Mercurial > illumos > illumos-gate
changeset 3445:cd5b6c3b1817
PSARC/2006/061 xmemfs EOF
6379911 Remove support for xmemfs from Solaris
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deleted_files/usr/src/cmd/fs.d/xmemfs/Makefile Thu Jan 18 16:23:02 2007 -0800 @@ -0,0 +1,37 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +#ident "%Z%%M% %I% %E% SMI" +# +# Copyright 2003 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +FSTYPE= xmemfs +LIBPROG= mount + +include ../Makefile.fstype +include ../Makefile.mount + +CPPFLAGS += -I../../../uts/intel -I../../../uts/i86pc + +include ../Makefile.mount.targ
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deleted_files/usr/src/cmd/fs.d/xmemfs/mount.c Thu Jan 18 16:23:02 2007 -0800 @@ -0,0 +1,330 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <stdio.h> +#include <signal.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <sys/mntent.h> +#include <sys/mnttab.h> +#include <sys/mntent.h> +#include <sys/mount.h> +#include <sys/fs/xmem.h> +#include <sys/types.h> +#include <locale.h> +#include <sys/stat.h> +#include <sys/statvfs.h> +#include <fslib.h> +#include <stdlib.h> + +enum { + FSSIZE, + VERBOSE, + LARGEBSIZE, +#ifdef DEBUG + NOLARGEBSIZE, + BSIZE, + RESERVEMEM, + NORESERVEMEM, +#endif + XOPTSZ +}; + +static char *myopts[] = { + "size", /* required */ + "vb", + "largebsize", +#ifdef DEBUG + "nolargebsize", /* default */ + "bsize", /* internal use only */ + "reservemem", /* default */ + "noreservemem", +#endif + NULL +}; + +static offset_t +atosz(char *optarg) +{ + offset_t off; + char *endptr; + + off = strtoll(optarg, &endptr, 0); + + switch (*endptr) { + case 't': case 'T': + off *= 1024; + /* FALLTHROUGH */ + case 'g': case 'G': + off *= 1024; + /* FALLTHROUGH */ + case 'm': case 'M': + off *= 1024; + /* FALLTHROUGH */ + case 'k': case 'K': + off *= 1024; + /* FALLTHROUGH */ + default: + break; + } + return (off); +} + + +int +main(int argc, char *argv[]) +{ + struct mnttab mnt; + int c; + char *myname; + char optbuf[MAX_MNTOPT_STR]; + char typename[64]; + char *options, *value; + int error = 0; + int verbose = 0; + int nmflg = 0; + offset_t fssize = 0; + offset_t bsize = 0; + int optsize = sizeof (struct xmemfs_args); + int mflg = 0; + int optcnt = 0; + int qflg = 0; + char *saveopt; + struct xmemfs_args xargs = { + 0, /* xa_fssize - file system sz */ + 0, /* xa_bsize - blk sz */ + XARGS_RESERVEMEM /* xa_flags */ + }; + + (void) setlocale(LC_ALL, ""); + +#if !defined(TEXT_DOMAIN) +#define TEXT_DOMAIN "SYS_TEST" +#endif + (void) textdomain(TEXT_DOMAIN); + + myname = strrchr(argv[0], '/'); + myname = myname ? myname + 1 : argv[0]; + (void) snprintf(typename, sizeof (typename), "%s_%s", MNTTYPE_XMEMFS, + myname); + argv[0] = typename; + + /* RO xmemfs not supported... */ + (void) strlcpy(optbuf, "rw", sizeof (optbuf)); + + while ((c = getopt(argc, argv, "Vqo:mO")) != EOF) { + switch (c) { + case 'q': + qflg++; + break; + case 'V': + verbose++; + break; + case 'm': + nmflg++; + mflg |= MS_NOMNTTAB; + break; + case 'O': + mflg |= MS_OVERLAY; + break; + case 'o': + options = optarg; + while (*options != '\0') { + saveopt = options; + + switch (getsubopt(&options, myopts, &value)) { + case LARGEBSIZE: + xargs.xa_flags |= XARGS_LARGEPAGES; + break; + case FSSIZE: + if (value) { + fssize = atosz(value); + if (!fssize) { + (void) fprintf(stderr, +gettext("%s: value %s for option \"%s\" is invalid\n"), +typename, value, myopts[FSSIZE]); + error++; + break; + } + xargs.xa_fssize = fssize; + optcnt++; + if (verbose) + (void) fprintf(stderr, +gettext("setting fssize to %d\n"), fssize); + } else { + (void) fprintf(stderr, +gettext("%s: option \"%s\" requires value\n"), typename, myopts[FSSIZE]); + error++; + } + break; +#ifdef DEBUG + case RESERVEMEM: + xargs.xa_flags |= XARGS_RESERVEMEM; + break; + case NORESERVEMEM: + xargs.xa_flags &= ~XARGS_RESERVEMEM; + break; + case NOLARGEBSIZE: + xargs.xa_flags &= ~XARGS_LARGEPAGES; + break; + case BSIZE: /* file system block size */ + if (value) { + bsize = atosz(value); + if (!bsize) { + (void) fprintf(stderr, +gettext("%s: value %s for option \"%s\" is invalid\n"), +typename, value, myopts[FSSIZE]); + error++; + break; + } + xargs.xa_bsize = bsize; + optcnt++; + if (verbose) + (void) fprintf(stderr, +gettext("setting bsize to %d\n"), bsize); + } else { + (void) fprintf(stderr, +gettext("%s: option \"%s\" requires value\n"), typename, myopts[BSIZE]); + error++; + } + break; +#endif + + case VERBOSE: + verbose++; + break; + default: + if (fsisstdopt(saveopt)) { + (void) strlcat(optbuf, ",", + sizeof (optbuf)); + (void) strlcat(optbuf, + saveopt, sizeof (optbuf)); + break; + } + if (!qflg) { + (void) fprintf(stderr, gettext( + "%s: WARNING: ignoring " + "option \"%s\"\n"), + typename, saveopt); + } + + break; + } + } + if (bsize) { + (void) snprintf(optbuf, sizeof (optbuf), + "%s,bsize=%lld", optbuf, bsize); + if (--optcnt) + (void) strlcat(optbuf, ",", + sizeof (optbuf)); + if (verbose) + (void) fprintf(stderr, "optbuf:%s\n", + optbuf); + } + if (fssize) { + (void) snprintf(optbuf, sizeof (optbuf), + "%s,size=%lld", optbuf, fssize); + if (--optcnt) + (void) strlcat(optbuf, ",", + sizeof (optbuf)); + if (verbose) + (void) fprintf(stderr, "optbuf:%s\n", + optbuf); + } else { + error++; + } + if (options[0] && !error) { + (void) strlcat(optbuf, options, + sizeof (optbuf)); + if (verbose) + (void) fprintf(stderr, "optbuf:%s\n", + optbuf); + } + if (verbose) + (void) fprintf(stderr, "optsize:%d optbuf:%s\n", + optsize, optbuf); + break; + default: + error++; + break; + } + } + + if (verbose && !error) { + char *optptr; + + (void) fprintf(stderr, "%s", typename); + for (optcnt = 1; optcnt < argc; optcnt++) { + optptr = argv[optcnt]; + if (optptr) + (void) fprintf(stderr, " %s", optptr); + } + (void) fprintf(stderr, "\n"); + } + + if (argc - optind != 2 || error) { + (void) fprintf(stderr, + gettext("Usage: %s -o[largebsize,]size=sz" + " xmem mount_point\n"), typename); + exit(1); + } + + mnt.mnt_special = argv[optind++]; + mnt.mnt_mountp = argv[optind++]; + mnt.mnt_fstype = MNTTYPE_XMEMFS; + mflg |= MS_DATA | MS_OPTIONSTR; + mnt.mnt_mntopts = optbuf; + + saveopt = strdup(optbuf); + + if (verbose) { + (void) fprintf(stderr, "mount(%s, \"%s\", %d, %s", + mnt.mnt_special, mnt.mnt_mountp, mflg, MNTTYPE_XMEMFS); + if (optsize) + (void) fprintf(stderr, ", \"%s\", %d)\n", + optbuf, strlen(optbuf)); + else + (void) fprintf(stderr, ")\n"); + } + if (mount(mnt.mnt_special, mnt.mnt_mountp, mflg, MNTTYPE_XMEMFS, + &xargs, optsize, optbuf, MAX_MNTOPT_STR)) { + if (errno == EBUSY) + (void) fprintf(stderr, + gettext("mount: %s already mounted\n"), + mnt.mnt_mountp); + else + perror("mount"); + exit(1); + } + + if (!qflg && saveopt != NULL) + cmp_requested_to_actual_options(saveopt, optbuf, + mnt.mnt_special, mnt.mnt_mountp); + + return (0); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deleted_files/usr/src/uts/intel/fs/xmemfs/seg_xmem.c Thu Jan 18 16:23:02 2007 -0800 @@ -0,0 +1,823 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +/* + * Portions of this source code were derived from Berkeley 4.3 BSD + * under license from the Regents of the University of California. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * The segxmem driver is used by the xmemfs to get faster (than seg_map) + * mappings [lower routine overhead] to random vnode/offsets. + * Mappings are made to a very limited kernel address range and to a + * potentially much larger user address range. It is the speed of mmap + * and munmaps to the user address space that we are concerned with. + * We also need to ensure very low overhead for I/O similar to seg_spt + */ + +#include <sys/types.h> +#include <sys/t_lock.h> +#include <sys/param.h> +#include <sys/sysmacros.h> +#include <sys/buf.h> +#include <sys/systm.h> +#include <sys/vnode.h> +#include <sys/mman.h> +#include <sys/errno.h> +#include <sys/cred.h> +#include <sys/kmem.h> +#include <sys/vtrace.h> +#include <sys/cmn_err.h> +#include <sys/debug.h> +#include <sys/thread.h> +#include <sys/dumphdr.h> +#include <sys/map.h> +#include <sys/atomic.h> + +#include <vm/seg_kmem.h> +#include <vm/seg_vn.h> +#include <vm/hat.h> +#include <vm/as.h> +#include <vm/seg.h> +#include <vm/page.h> +#include <vm/pvn.h> +#include <vm/rm.h> +#include <sys/vfs.h> +#include <sys/fs/seg_xmem.h> +#include <sys/fs/xmem.h> +#include <sys/lgrp.h> + +/* + * Private seg op routines. + */ +static void segxmem_free(struct seg *seg); +static int segxmem_dup(struct seg *seg, struct seg *newseg); +static int segxmem_unmap(struct seg *seg, caddr_t raddr, size_t ssize); +static faultcode_t segxmem_fault(struct hat *hat, struct seg *seg, caddr_t addr, + size_t len, enum fault_type type, enum seg_rw rw); +static int segxmem_setprot(struct seg *seg, caddr_t addr, size_t len, + uint_t prot); +static int segxmem_checkprot(struct seg *seg, caddr_t addr, size_t len, + uint_t prot); +static size_t segxmem_incore(struct seg *seg, caddr_t addr, size_t len, + register char *vec); +static int segxmem_sync(struct seg *seg, register caddr_t addr, size_t len, + int attr, uint_t flags); +static int segxmem_lockop(struct seg *seg, caddr_t addr, size_t len, + int attr, int op, ulong_t *lockmap, size_t pos); +static int segxmem_getprot(struct seg *seg, caddr_t addr, size_t len, + uint_t *protv); +static u_offset_t segxmem_getoffset(struct seg *seg, caddr_t addr); +static int segxmem_gettype(struct seg *seg, caddr_t addr); +static int segxmem_getvp(struct seg *, caddr_t, struct vnode **); +static int segxmem_advise(struct seg *seg, caddr_t addr, size_t len, + uint_t behav); +static void segxmem_dump(struct seg *seg); +static int segxmem_pagelock(struct seg *seg, caddr_t addr, size_t len, + struct page ***ppp, enum lock_type type, + enum seg_rw rw); +static int segxmem_setpgsz(struct seg *, caddr_t, size_t, uint_t); +static int segxmem_getmemid(struct seg *, caddr_t, memid_t *); + +#define SEGXMEM_NULLOP(t) (t(*)())NULL + +static struct seg_ops segxmem_ops = { + segxmem_dup, /* dup */ + segxmem_unmap, + segxmem_free, + segxmem_fault, /* Change if HAT_DYNAMIC_ISM_UNMAP suported */ + SEGXMEM_NULLOP(int), /* faulta */ + segxmem_setprot, + segxmem_checkprot, + SEGXMEM_NULLOP(int), /* kluster */ + SEGXMEM_NULLOP(size_t), /* swapout */ + segxmem_sync, /* sync */ + segxmem_incore, /* incore */ + segxmem_lockop, /* lockop */ + segxmem_getprot, + segxmem_getoffset, + segxmem_gettype, + segxmem_getvp, + segxmem_advise, /* advise */ + segxmem_dump, + segxmem_pagelock, /* pagelock */ + segxmem_setpgsz, + segxmem_getmemid, /* getmemid */ + SEGXMEM_NULLOP(lgrp_mem_policy_info_t *), /* getpolicy */ +}; + + +/* + * Statistics for segxmem operations. + * + * No explicit locking to protect these stats. + */ +struct segxmemcnt segxmemcnt = { + { "fault", KSTAT_DATA_ULONG }, + { "getmap", KSTAT_DATA_ULONG }, + { "pagecreate", KSTAT_DATA_ULONG } +}; + +kstat_named_t *segxmemcnt_ptr = (kstat_named_t *)&segxmemcnt; +uint_t segxmemcnt_ndata = sizeof (segxmemcnt) / sizeof (kstat_named_t); + + +int segxmem_DR = -1; /* Indicate if hat supports DR */ + +int remap_broken = 0; + + +int +segxmem_create(struct seg *seg, struct segxmem_crargs *xmem_a) +{ + struct segxmem_data *sxd; + uint_t prot; + caddr_t taddr; + uint_t blocknumber, lastblock; + page_t ***ppa; + struct hat *hat; + size_t tlen; + + ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); + + if (((uintptr_t)seg->s_base | seg->s_size) & PAGEOFFSET) + panic("segxmem not PAGESIZE aligned"); + + sxd = kmem_zalloc(sizeof (struct segxmem_data), KM_SLEEP); + + seg->s_data = (void *)sxd; + seg->s_ops = &segxmem_ops; + + sxd->sxd_prot = xmem_a->xma_prot; + sxd->sxd_vp = xmem_a->xma_vp; + sxd->sxd_offset = xmem_a->xma_offset; + sxd->sxd_bshift = xmem_a->xma_bshift; + sxd->sxd_bsize = 1 << xmem_a->xma_bshift; + + blocknumber = 0; + lastblock = (seg->s_size - 1) >> sxd->sxd_bshift; + taddr = seg->s_base; + tlen = sxd->sxd_bsize; + ppa = xmem_a->xma_ppa; + hat = seg->s_as->a_hat; + prot = xmem_a->xma_prot; + while (blocknumber <= lastblock) { + page_t **ppp; + + if (VTOXM(sxd->sxd_vp)->xm_ppb == 1) + ppp = (page_t **)ppa; + else + ppp = *ppa; + + hat_memload_array(hat, taddr, tlen, ppp, prot | HAT_NOSYNC, + HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST); + + blocknumber++; + ppa++; + taddr += tlen; + } + + return (0); +} + +static void +segxmem_free(seg) + struct seg *seg; +{ + struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; + ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); + kmem_free(sxd, sizeof (struct segxmem_data)); + +} + +static int +segxmem_dup(struct seg *seg, struct seg *newseg) +{ + struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; + struct segxmem_data *newsxd; + caddr_t vaddr; + ulong_t pfn; + page_t *pp, **ppa; + int i; + int ppb; + + newsxd = kmem_zalloc(sizeof (struct segxmem_data), KM_SLEEP); + + newsxd->sxd_vp = sxd->sxd_vp; + newsxd->sxd_offset = sxd->sxd_offset; + newsxd->sxd_bsize = sxd->sxd_bsize; + newsxd->sxd_bshift = sxd->sxd_bshift; + newsxd->sxd_prot = sxd->sxd_prot; + + newsxd->sxd_softlockcnt = sxd->sxd_softlockcnt; + + newseg->s_ops = &segxmem_ops; + newseg->s_data = (void *)newsxd; + + ppb = btop(sxd->sxd_bsize); + if (ppb > 1) + ppa = kmem_alloc(ppb * sizeof (page_t *), KM_SLEEP); + else + ppa = &pp; + + for (vaddr = seg->s_base; vaddr < seg->s_base + seg->s_size; + vaddr += sxd->sxd_bsize) { + + /* ### sxd->sxd_vp->xn_ppa[(vaddr - s_base)]->p_pagenum */ + + pfn = hat_getpfnum(seg->s_as->a_hat, vaddr); + + if (pfn == PFN_INVALID) + continue; + + for (i = 0; i < ppb; i++) { + ppa[i] = page_numtopp_nolock(pfn); + pfn++; + } + hat_memload_array(newseg->s_as->a_hat, vaddr, sxd->sxd_bsize, + ppa, sxd->sxd_prot | HAT_NOSYNC, + HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST); + } + if (ppb > 1) + kmem_free(ppa, ppb * sizeof (page_t *)); + + return (0); +} + +/* + * This routine is called via a machine specific fault handling + * routine. It is also called by software routines wishing to + * lock or unlock a range of addresses. + */ +static faultcode_t +segxmem_fault( + struct hat *hat, + struct seg *seg, + caddr_t addr, + size_t len, + enum fault_type type, + enum seg_rw rw) +{ + struct segxmem_data *sxd; + size_t npages = btopr(len); + +#ifdef lint + hat = hat; + addr = addr; +#endif + + ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); + + sxd = (struct segxmem_data *)seg->s_data; + + ASSERT(addr >= seg->s_base); + ASSERT(((addr + len) - seg->s_base) <= seg->s_size); + + switch (type) { + + case F_SOFTLOCK: + + /* + * Because we know that every shared memory is + * already locked and called in the same context. + */ + atomic_add_long(&sxd->sxd_softlockcnt, npages); + return (0); + + case F_SOFTUNLOCK: + + atomic_add_long(&sxd->sxd_softlockcnt, -npages); + + /* + * Check for softlock + */ + if (sxd->sxd_softlockcnt == 0) { + /* + * All SOFTLOCKS are gone. Wakeup any waiting + * unmappers so they can try again to unmap. + * As an optimization check for waiters first + * without the mutex held, so we're not always + * grabbing it on softunlocks. + */ + if (AS_ISUNMAPWAIT(seg->s_as)) { + mutex_enter(&seg->s_as->a_contents); + if (AS_ISUNMAPWAIT(seg->s_as)) { + AS_CLRUNMAPWAIT(seg->s_as); + cv_broadcast(&seg->s_as->a_cv); + } + mutex_exit(&seg->s_as->a_contents); + } + } + return (0); + + case F_INVAL: + + if ((rw == S_EXEC) && !(sxd->sxd_prot & PROT_EXEC)) + return (FC_NOMAP); + + /* + * all xmem pages should already be mapped - desired mapping + * unknown + */ + + panic("xmem page fault"); + /*NOTREACHED*/ + + case F_PROT: + /* + * We can get away with this because ISM segments are + * always rw. Other than this unusual case, there + * should be no instances of protection violations. + */ + return (0); + + default: + XMEMPRINTF(8, ("segxmem_fault: type %x\n", type)); + return (FC_NOMAP); + } +} + +static int +segxmem_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) +{ + struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; + + ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); + + if (seg->s_base == addr && seg->s_size == len) { + sxd->sxd_prot = prot; + hat_chgprot(seg->s_as->a_hat, addr, len, prot); + } else { + return (IE_NOMEM); + } + return (0); +} + +/*ARGSUSED*/ +static int +segxmem_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) +{ + struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; + + ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); + + /* + * Need not acquire the segment lock since + * "sxd_prot" is a read-only field. + */ + return (((sxd->sxd_prot & prot) != prot) ? EACCES : 0); +} + +static int +segxmem_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) +{ + struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; + size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1; + + ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); + + if (pgno != 0) { + do + protv[--pgno] = sxd->sxd_prot; + while (pgno != 0); + } + return (0); +} + +static u_offset_t +segxmem_getoffset(struct seg *seg, caddr_t addr) +{ + register struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; + + ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); + + return ((u_offset_t)sxd->sxd_offset + (addr - seg->s_base)); +} + +/*ARGSUSED*/ +static int +segxmem_gettype(struct seg *seg, caddr_t addr) +{ + ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); + + return (MAP_SHARED); +} + +/*ARGSUSED*/ +static int +segxmem_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp) +{ + register struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; + + ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); + + *vpp = sxd->sxd_vp; + return (0); +} + +#ifndef lint /* currently unused */ +/* + * Check to see if it makes sense to do kluster/read ahead to + * addr + delta relative to the mapping at addr. We assume here + * that delta is a signed PAGESIZE'd multiple (which can be negative). + * + * For segxmem we always "approve" of this action from our standpoint. + */ +/*ARGSUSED*/ +static int +segxmem_kluster(struct seg *seg, caddr_t addr, ssize_t delta) +{ + return (0); +} + +static void +segxmem_badop() +{ + panic("segxmem_badop"); + /*NOTREACHED*/ +} + +#endif + +/* + * Special public segxmem operations + */ + + +void +segxmem_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw) +{ + page_t *pp; + struct segxmem_data *sxd = (struct segxmem_data *)(seg->s_data); + struct vnode *vp = sxd->sxd_vp; + u_offset_t off = sxd->sxd_offset; + caddr_t eaddr; + + ASSERT(seg->s_as == &kas); + + panic("segxmem_pageunlock"); + + eaddr = addr + len; + addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); + + for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { + hat_unlock(kas.a_hat, addr, PAGESIZE); + + /* + * Use page_find() instead of page_lookup() to + * find the page since we know that it has + * "exclusive" lock. + */ + pp = page_find(vp, off); + if (pp == NULL) + panic("segxmem_pageunlock"); + if (rw == S_WRITE) { + hat_setrefmod(pp); + } else if (rw != S_OTHER) { + hat_setref(pp); + } + + page_unlock(pp); + } +} + +/* + * segxmem_getmap allocates from the map an address range to map the vnode vp + * in the range <off, off + len). + * + * If pagecreate is nonzero, segxmem_getmap will create the page(s). + * calls hat_memload_array to load the translations. + * **ppa can be NULL if pagecreate is 0. + */ +caddr_t +segxmem_getmap(struct map *map, struct vnode *vp, u_offset_t off, size_t len, + page_t **ppa, enum seg_rw rw) +{ + caddr_t baseaddr; + uint_t attr = (rw == S_WRITE)?PROT_WRITE|PROT_READ:PROT_READ; + +#ifdef lint + vp = vp; + off = off; +#endif + + segxmemcnt.sx_getmapflt.value.ul++; + + baseaddr = (caddr_t)rmalloc_wait(map, len); + + hat_memload_array(kas.a_hat, baseaddr, len, ppa, attr | HAT_NOSYNC, + HAT_LOAD); + + return (baseaddr); +} + +void +segxmem_release(struct map *map, caddr_t addr, size_t len) +{ + + hat_unload(kas.a_hat, addr, len, HAT_UNLOAD_NOSYNC); + rmfree(map, len, (ulong_t)addr); +} + +int +segxmem_remap(struct seg *seg, struct vnode *vp, caddr_t addr, size_t len, + page_t ***ppa, uchar_t prot) +{ + struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; + uint_t blocknumber, lastblock, flags; + caddr_t taddr; + size_t tlen; + + ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); + + if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size || + (seg->s_ops != &segxmem_ops) || (sxd->sxd_vp != vp)) + return (1); /* Fail */ + + ASSERT(sxd->sxd_prot == prot); /* remove this later */ + + /* aligned addr and length */ + + blocknumber = (addr - seg->s_base) >> sxd->sxd_bshift; + lastblock = (addr + len - 1 - seg->s_base) >> sxd->sxd_bshift; + taddr = addr; + tlen = sxd->sxd_bsize; + while (blocknumber <= lastblock) { + + /* + * entire xmem segment mapped on mmap() call - if in the + * segment range(checked above), there should be a mapping + * therefore flags always HAT_LOAD_REMAP. + * + */ + if (hat_getpfnum(seg->s_as->a_hat, taddr) != PFN_INVALID) { +#ifdef DEBUG + if (remap_broken) + hat_unload(seg->s_as->a_hat, taddr, + tlen, HAT_UNLOAD); +#endif + + /* + * assume the hat would leave mapping HAT_LOAD_LOCK'ed + * on REMAP. + */ + flags = HAT_LOAD | HAT_LOAD_NOCONSIST | HAT_LOAD_REMAP; + } else { + XMEMPRINTF(4, + ("segxmem_remap: taddr %p pfn inv\n", + (void *)taddr)); + flags = HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST; + } + + prot |= HAT_NOSYNC; + + if (btop(sxd->sxd_bsize) == 1) + hat_memload_array(seg->s_as->a_hat, taddr, tlen, + (page_t **)ppa, prot, flags); + else + hat_memload_array(seg->s_as->a_hat, taddr, tlen, *ppa, + prot, flags); + + blocknumber++; + ppa++; + taddr += tlen; + } + return (0); +} + +/* ARGSUSED */ +static int +segxmem_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags) +{ + ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); + + return (0); +} + +/* + * segxmem pages are always "in core" since the memory is locked down. + */ +/* ARGSUSED */ +static size_t +segxmem_incore(struct seg *seg, caddr_t addr, size_t len, char *vec) +{ + + caddr_t eo_seg; + + ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); +#ifdef lint + seg = seg; +#endif + + eo_seg = addr + len; + while (addr < eo_seg) { + /* page exist, and it's locked. */ + *vec++ = (char)0x9; + addr += PAGESIZE; + } + return (len); +} + +static int segxmem_advise(struct seg *seg, caddr_t addr, size_t len, + uint_t behav) +{ +#ifdef lint + seg = seg; + addr = addr; + len = len; + behav = behav; +#endif + return (0); +} + +/* + * called from as_ctl(, MC_LOCK,) + * + */ +/* ARGSUSED */ +static int +segxmem_lockop(struct seg *seg, caddr_t addr, size_t len, int attr, + int op, ulong_t *lockmap, size_t pos) +{ + ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); + /* + * for spt, as->a_paglck is never set + * so this routine should not be called. + */ + return (0); +} + +static int +segxmem_unmap(struct seg *seg, caddr_t addr, size_t ssize) +{ + struct segxmem_data *sxd, *nsxd; + struct seg *nseg; + caddr_t segend, delsegend; + + XMEMPRINTF(1, ("segxmem_unmap: seg %p addr %p size %lx\n", + (void *)seg, (void *)addr, ssize)); + + ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); + + hat_unload(seg->s_as->a_hat, addr, ssize, HAT_UNLOAD_UNLOCK); + if (addr == seg->s_base && ssize == seg->s_size) { + seg_free(seg); + return (0); + } + sxd = (struct segxmem_data *)seg->s_data; + + /* partial unmap of the segment - begin, end and middle */ + + /* check for deleting at the beginning */ + + if (addr == seg->s_base) { + seg->s_base += ssize; + seg->s_size -= ssize; + return (0); + } + delsegend = addr + ssize; + segend = seg->s_base + seg->s_size; + + /* check for deleting at the end */ + if (delsegend == segend) { + seg->s_size -= ssize; + return (0); + } + + /* Now for the tough one. Make a new one at end and cut the current */ + + seg->s_size = addr - seg->s_base; /* adjust original segment */ + + nseg = seg_alloc(seg->s_as, delsegend, segend - delsegend); + if (nseg == NULL) + panic("segxmem seg_alloc"); + + nsxd = kmem_zalloc(sizeof (struct segxmem_data), KM_SLEEP); + + nsxd->sxd_vp = sxd->sxd_vp; + nsxd->sxd_offset = sxd->sxd_offset; /* unused */ + nsxd->sxd_bsize = sxd->sxd_bsize; + nsxd->sxd_bshift = sxd->sxd_bshift; + nsxd->sxd_prot = sxd->sxd_prot; + nsxd->sxd_softlockcnt = sxd->sxd_softlockcnt; /* ### */ + + nseg->s_ops = &segxmem_ops; + nseg->s_data = (void *)nsxd; + + return (0); +} + +/* + * Dump the pages belonging to this segxmem segment. + */ +static void +segxmem_dump(struct seg *seg) +{ + struct segxmem_data *sxd; + caddr_t addr; + int i, j; + uint_t nblocks; + pgcnt_t npages; + + sxd = (struct segxmem_data *)seg->s_data; + nblocks = howmany(seg->s_size, sxd->sxd_bsize); + npages = nblocks << (sxd->sxd_bshift - PAGESHIFT); + addr = seg->s_base; + + /* XXX figure out if we need something else here */ + for (i = 0; i < nblocks; i++) { + pfn_t pfn = hat_getpfnum(seg->s_as->a_hat, addr); + + for (j = 0; j < npages; j++) { + dump_addpage(seg->s_as, addr, pfn); + pfn++; + addr += PAGESIZE; + } + } +} +/*ARGSUSED*/ +static int +segxmem_setpgsz(struct seg *seg, caddr_t addr, size_t len, uint_t szc) +{ + return (ENOTSUP); +} + +static int +segxmem_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp) +{ + struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; + + memidp->val[0] = (uintptr_t)sxd->sxd_vp; + memidp->val[1] = sxd->sxd_offset + (uintptr_t)(addr - seg->s_base); + return (0); +} + +/*ARGSUSED*/ +static int +segxmem_pagelock(struct seg *seg, caddr_t addr, size_t len, + struct page ***ppp, enum lock_type type, enum seg_rw rw) +{ + return (ENOTSUP); +} + +#define XMEMBUFSZ 16384 +#define XMEMPAD 128 /* larger than max len xmem string */ + +char xmembuf[XMEMBUFSZ + XMEMPAD]; +uint_t xmembufi; +int xmemlevel = 4; + +void +xmemprintf(const char *fmt, ...) +{ + va_list args; + int len; + char localbuf[XMEMPAD]; + uint_t newval, oldxmembufi; + + va_start(args, fmt); + + len = snprintf(localbuf, INT_MAX, "%d: ", (int)CPU->cpu_id); + len += vsnprintf(localbuf + len, INT_MAX, fmt, args); + + ASSERT(len < XMEMPAD); + + do { + oldxmembufi = xmembufi; + newval = oldxmembufi + len; + if (newval > XMEMBUFSZ) + newval = 0; + } while (cas32(&xmembufi, oldxmembufi, newval) != oldxmembufi); + + bcopy(localbuf, xmembuf + oldxmembufi, len); + + va_end(args); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deleted_files/usr/src/uts/intel/fs/xmemfs/xmem_dir.c Thu Jan 18 16:23:02 2007 -0800 @@ -0,0 +1,1025 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/sysmacros.h> +#include <sys/systm.h> +#include <sys/time.h> +#include <sys/vfs.h> +#include <sys/vnode.h> +#include <sys/errno.h> +#include <sys/cmn_err.h> +#include <sys/cred.h> +#include <sys/stat.h> +#include <sys/debug.h> +#include <sys/policy.h> +#include <sys/fs/xmem.h> + +static int xdircheckpath(struct xmemnode *, struct xmemnode *, struct cred *); +static int xdirrename(struct xmemnode *, struct xmemnode *, struct xmemnode *, + char *, struct xmemnode *, struct xdirent *, struct cred *); +static void xdirfixdotdot(struct xmemnode *, struct xmemnode *, + struct xmemnode *); +static int xdirmakexnode(struct xmemnode *, struct xmount *, + struct vattr *, enum de_op, struct xmemnode **, struct cred *); +static int xdiraddentry(struct xmemnode *, struct xmemnode *, char *, + enum de_op, struct xmemnode *); + + +#define X_HASH_SIZE 8192 /* must be power of 2 */ +#define X_MUTEX_SIZE 64 + +static struct xdirent *x_hashtable[X_HASH_SIZE]; +static kmutex_t x_hashmutex[X_MUTEX_SIZE]; + +#define X_HASH_INDEX(a) ((a) & (X_HASH_SIZE-1)) +#define X_MUTEX_INDEX(a) ((a) & (X_MUTEX_SIZE-1)) + +#define XMEMFS_HASH(xp, name, hash) \ + { \ + char Xc, *Xcp; \ + hash = ((uintptr_t)(xp)) >> 8; \ + for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++) \ + hash = (hash << 4) + hash + (uint_t)Xc; \ + } + +void +xmemfs_hash_init(void) +{ + int ix; + + for (ix = 0; ix < X_MUTEX_SIZE; ix++) + mutex_init(&x_hashmutex[ix], NULL, MUTEX_DEFAULT, NULL); +} + +/* + * This routine is where the rubber meets the road for identities. + */ +static void +xmemfs_hash_in(struct xdirent *x) +{ + uint_t hash; + struct xdirent **prevpp; + kmutex_t *t_hmtx; + + XMEMFS_HASH(x->xd_parent, x->xd_name, hash); + x->xd_hash = hash; + prevpp = &x_hashtable[X_HASH_INDEX(hash)]; + t_hmtx = &x_hashmutex[X_MUTEX_INDEX(hash)]; + mutex_enter(t_hmtx); + x->xd_link = *prevpp; + *prevpp = x; + mutex_exit(t_hmtx); +} + +/* + * Remove xdirent *t from the hash list. + */ +static void +xmemfs_hash_out(struct xdirent *x) +{ + uint_t hash; + struct xdirent **prevpp; + kmutex_t *t_hmtx; + + hash = x->xd_hash; + prevpp = &x_hashtable[X_HASH_INDEX(hash)]; + t_hmtx = &x_hashmutex[X_MUTEX_INDEX(hash)]; + mutex_enter(t_hmtx); + while (*prevpp != x) + prevpp = &(*prevpp)->xd_link; + *prevpp = x->xd_link; + mutex_exit(t_hmtx); +} + +static struct xdirent * +xmemfs_hash_lookup(char *name, struct xmemnode *parent, uint_t hold, + struct xmemnode **found) +{ + struct xdirent *l; + uint_t hash; + kmutex_t *t_hmtx; + struct xmemnode *xp; + + XMEMFS_HASH(parent, name, hash); + t_hmtx = &x_hashmutex[X_MUTEX_INDEX(hash)]; + mutex_enter(t_hmtx); + l = x_hashtable[X_HASH_INDEX(hash)]; + while (l) { + if ((l->xd_hash == hash) && + (l->xd_parent == parent) && + (strcmp(l->xd_name, name) == 0)) { + /* + * We need to make sure that the xmemnode that + * we put a hold on is the same one that we pass back. + * Hence, temporary variable xp is necessary. + * The right way to fix this would be to add the t_hmtx + * lock acquisition to callers like tdirrename, so + * that this race condition doesn't occur. But + * this "fix" is simpler, and less of a performance + * impact. + */ + xp = l->xd_xmemnode; + if (hold) { + ASSERT(xp); + xmemnode_hold(xp); + } + if (found) + *found = xp; + mutex_exit(t_hmtx); + return (l); + } else { + l = l->xd_link; + } + } + mutex_exit(t_hmtx); + return (NULL); +} + +/* + * Search directory 'parent' for entry 'name'. + * + * The calling thread can't hold the write version + * of the rwlock for the directory being searched + * + * 0 is returned on success and *foundxp points + * to the found xmemnode with its vnode held. + */ +int +xdirlookup( + struct xmemnode *parent, + char *name, + struct xmemnode **foundxp, + struct cred *cred) +{ + int error; + + *foundxp = NULL; + if (parent->xn_type != VDIR) + return (ENOTDIR); + + if ((error = xmem_xaccess(parent, VEXEC, cred))) + return (error); + + if (*name == '\0') { + xmemnode_hold(parent); + *foundxp = parent; + return (0); + } + + /* + * Search the directory for the matching name + * We need the lock protecting the xn_dir list + * so that it doesn't change out from underneath us. + * xmemfs_hash_lookup() will pass back the xmemnode + * with a hold on it. + */ + + if (xmemfs_hash_lookup(name, parent, 1, foundxp) != NULL) { + ASSERT(*foundxp); + return (0); + } + + return (ENOENT); +} + +/* + * Enter a directory entry for 'name' and 'xp' into directory 'dir' + * + * Returns 0 on success. + */ +int +xdirenter( + struct xmount *xm, + struct xmemnode *dir, /* target directory to make entry in */ + char *name, /* name of entry */ + enum de_op op, /* entry operation */ + struct xmemnode *fromparent, /* source directory if rename */ + struct xmemnode *xp, /* source xmemnode, if link/rename */ + struct vattr *va, + struct xmemnode **xpp, /* return xmemnode, if create/mkdir */ + struct cred *cred) +{ + struct xdirent *xdp; + struct xmemnode *found = NULL; + int error = 0; + char *s; + + /* + * xn_rwlock is held to serialize direnter and dirdeletes + */ + ASSERT(RW_WRITE_HELD(&dir->xn_rwlock)); + ASSERT(dir->xn_type == VDIR); + + /* + * Don't allow '/' characters in pathname component + * (thus in ufs_direnter()). + */ + for (s = name; *s; s++) + if (*s == '/') + return (EACCES); + + ASSERT(name[0] != '\0'); + + /* + * For link and rename lock the source entry and check the link count + * to see if it has been removed while it was unlocked. + */ + if (op == DE_LINK || op == DE_RENAME) { + mutex_enter(&xp->xn_tlock); + if (xp->xn_nlink == 0) { + mutex_exit(&xp->xn_tlock); + return (ENOENT); + } + + if (xp->xn_nlink == MAXLINK) { + mutex_exit(&xp->xn_tlock); + return (EMLINK); + } + xp->xn_nlink++; + mutex_exit(&xp->xn_tlock); + gethrestime(&xp->xn_ctime); + } + + /* + * This might be a "dangling detached directory". + * it could have been removed, but a reference + * to it kept in u_cwd. don't bother searching + * it, and with any luck the user will get tired + * of dealing with us and cd to some absolute + * pathway. *sigh*, thus in ufs, too. + */ + if (dir->xn_nlink == 0) { + error = ENOENT; + goto out; + } + + /* + * If this is a rename of a directory and the parent is + * different (".." must be changed), then the source + * directory must not be in the directory hierarchy + * above the target, as this would orphan everything + * below the source directory. + */ + if (op == DE_RENAME) { + if (xp == dir) { + error = EINVAL; + goto out; + } + if (xp->xn_type == VDIR) { + if ((fromparent != dir) && + (error = xdircheckpath(xp, dir, cred))) { + goto out; + } + } + } + + /* + * Search for the entry. Return "found" if it exists. + */ + xdp = xmemfs_hash_lookup(name, dir, 1, &found); + + if (xdp) { + ASSERT(found); + switch (op) { + case DE_CREATE: + case DE_MKDIR: + if (xpp) { + *xpp = found; + error = EEXIST; + } else { + xmemnode_rele(found); + } + break; + + case DE_RENAME: + error = xdirrename(fromparent, xp, + dir, name, found, xdp, cred); + xmemnode_rele(found); + break; + + case DE_LINK: + /* + * Can't link to an existing file. + */ + error = EEXIST; + xmemnode_rele(found); + break; + } + } else { + + /* + * The entry does not exist. Check write permission in + * directory to see if entry can be created. + */ + if (error = xmem_xaccess(dir, VWRITE, cred)) + goto out; + if (op == DE_CREATE || op == DE_MKDIR) { + /* + * Make new xmemnode and directory entry as required. + */ + error = xdirmakexnode(dir, xm, va, op, &xp, cred); + if (error) + goto out; + } + if (error = xdiraddentry(dir, xp, name, op, fromparent)) { + if (op == DE_CREATE || op == DE_MKDIR) { + /* + * Unmake the inode we just made. + */ + rw_enter(&xp->xn_rwlock, RW_WRITER); + if ((xp->xn_type) == VDIR) { + ASSERT(xdp == NULL); + /* + * cleanup allocs made by xdirinit() + */ + xdirtrunc(xp); + } + mutex_enter(&xp->xn_tlock); + xp->xn_nlink = 0; + mutex_exit(&xp->xn_tlock); + gethrestime(&xp->xn_ctime); + rw_exit(&xp->xn_rwlock); + xmemnode_rele(xp); + xp = NULL; + } + } else if (xpp) { + *xpp = xp; + } else if (op == DE_CREATE || op == DE_MKDIR) { + xmemnode_rele(xp); + } + } +out: + if (error && (op == DE_LINK || op == DE_RENAME)) { + /* + * Undo bumped link count. + */ + DECR_COUNT(&xp->xn_nlink, &xp->xn_tlock); + gethrestime(&xp->xn_ctime); + } + return (error); +} + +/* + * Delete entry xp of name "nm" from dir. + * Free dir entry space and decrement link count on xmemnode(s). + * + * Return 0 on success. + */ +int +xdirdelete( + struct xmemnode *dir, + struct xmemnode *xp, + char *nm, + enum dr_op op, + struct cred *cred) +{ + register struct xdirent *tpdp; + int error; + size_t namelen; + struct xmemnode *xptmp; + timestruc_t now; + + ASSERT(RW_WRITE_HELD(&dir->xn_rwlock)); + ASSERT(RW_WRITE_HELD(&xp->xn_rwlock)); + ASSERT(dir->xn_type == VDIR); + + ASSERT(nm[0] != '\0'); + + /* + * return error when removing . and .. + */ + if (nm[0] == '.') { + if (nm[1] == '\0') + return (EINVAL); + if (nm[1] == '.' && nm[2] == '\0') + return (EEXIST); /* thus in ufs */ + } + + if (error = xmem_xaccess(dir, VEXEC|VWRITE, cred)) + return (error); + + /* + * If the parent directory is "sticky", then the user must + * own the parent directory or the file in it, or else must + * have permission to write the file. Otherwise it may not + * be deleted (except by privileged users). Same as ufs_dirremove. + */ + if (error = xmem_sticky_remove_access(dir, xp, cred)) + return (error); + + if (dir->xn_dir == NULL) + return (ENOENT); + + tpdp = xmemfs_hash_lookup(nm, dir, 0, &xptmp); + if (tpdp == NULL) { + /* + * If it is gone, some other thread got here first! + * Return error ENOENT. + */ + return (ENOENT); + } + + /* + * If the xmemnode in the xdirent changed, we were probably + * the victim of a concurrent rename operation. The original + * is gone, so return that status (same as UFS). + */ + if (xp != xptmp) + return (ENOENT); + + xmemfs_hash_out(tpdp); + + /* + * Take tpdp out of the directory list. + */ + ASSERT(tpdp->xd_next != tpdp); + ASSERT(tpdp->xd_prev != tpdp); + if (tpdp->xd_prev) { + tpdp->xd_prev->xd_next = tpdp->xd_next; + } + if (tpdp->xd_next) { + tpdp->xd_next->xd_prev = tpdp->xd_prev; + } + + /* + * If the roving slot pointer happens to match tpdp, + * point it at the previous dirent. + */ + if (dir->xn_dir->xd_prev == tpdp) { + dir->xn_dir->xd_prev = tpdp->xd_prev; + } + ASSERT(tpdp->xd_next != tpdp); + ASSERT(tpdp->xd_prev != tpdp); + + /* + * tpdp points to the correct directory entry + */ + namelen = strlen(tpdp->xd_name) + 1; + + xmem_memfree(tpdp, sizeof (struct xdirent) + namelen); + dir->xn_size -= (sizeof (struct xdirent) + namelen); + dir->xn_dirents--; + + gethrestime(&now); + dir->xn_mtime = now; + dir->xn_ctime = now; + xp->xn_ctime = now; + + ASSERT(xp->xn_nlink > 0); + DECR_COUNT(&xp->xn_nlink, &xp->xn_tlock); + if (op == DR_RMDIR && xp->xn_type == VDIR) { + xdirtrunc(xp); + ASSERT(xp->xn_nlink == 0); + } + return (0); +} + +/* + * xdirinit is used internally to initialize a directory (dir) + * with '.' and '..' entries without checking permissions and locking + */ +void +xdirinit( + struct xmemnode *parent, /* parent of directory to initialize */ + struct xmemnode *dir) /* the new directory */ +{ + struct xdirent *dot, *dotdot; + timestruc_t now; + + ASSERT(RW_WRITE_HELD(&parent->xn_rwlock)); + ASSERT(dir->xn_type == VDIR); + + dot = xmem_memalloc(sizeof (struct xdirent) + 2, 1); + dotdot = xmem_memalloc(sizeof (struct xdirent) + 3, 1); + + /* + * Initialize the entries + */ + dot->xd_xmemnode = dir; + dot->xd_offset = 0; + dot->xd_name = (char *)dot + sizeof (struct xdirent); + dot->xd_name[0] = '.'; + dot->xd_parent = dir; + xmemfs_hash_in(dot); + + dotdot->xd_xmemnode = parent; + dotdot->xd_offset = 1; + dotdot->xd_name = (char *)dotdot + sizeof (struct xdirent); + dotdot->xd_name[0] = '.'; + dotdot->xd_name[1] = '.'; + dotdot->xd_parent = dir; + xmemfs_hash_in(dotdot); + + /* + * Initialize directory entry list. + */ + dot->xd_next = dotdot; + dot->xd_prev = dotdot; /* dot's xd_prev holds roving slot pointer */ + dotdot->xd_next = NULL; + dotdot->xd_prev = dot; + INCR_COUNT(&parent->xn_nlink, &parent->xn_tlock); + + dir->xn_dir = dot; + dir->xn_size = 2 * sizeof (struct xdirent) + 5; /* dot and dotdot */ + dir->xn_dirents = 2; + dir->xn_nlink = 2; /* one for daddy, and one just for being me */ + + gethrestime(&now); + dir->xn_mtime = now; + dir->xn_ctime = now; + parent->xn_ctime = now; +} + +/* + * xdirtrunc is called to remove all directory entries under this directory. + * The files themselves are removed elsewhere. + */ +void +xdirtrunc(struct xmemnode *dir) +{ + register struct xdirent *xdp; + size_t namelen; + timestruc_t now; + + ASSERT(RW_WRITE_HELD(&dir->xn_rwlock)); + ASSERT(dir->xn_type == VDIR); + + for (xdp = dir->xn_dir; xdp; xdp = dir->xn_dir) { + ASSERT(xdp->xd_next != xdp); + ASSERT(xdp->xd_prev != xdp); + ASSERT(xdp->xd_xmemnode); + ASSERT(xdp->xd_xmemnode->xn_nlink > 0); + + dir->xn_dir = xdp->xd_next; + namelen = strlen(xdp->xd_name) + 1; + + DECR_COUNT(&xdp->xd_xmemnode->xn_nlink, + &xdp->xd_xmemnode->xn_tlock); + + xmemfs_hash_out(xdp); + + xmem_memfree(xdp, sizeof (struct xdirent) + namelen); + dir->xn_size -= (sizeof (struct xdirent) + namelen); + dir->xn_dirents--; + } + + gethrestime(&now); + dir->xn_mtime = now; + dir->xn_ctime = now; + + ASSERT(dir->xn_dir == NULL); + ASSERT(dir->xn_size == 0); + ASSERT(dir->xn_dirents == 0); +} + +/* + * Check if the source directory is in the path of the target directory. + * The target directory is locked by the caller. + */ +static int +xdircheckpath( + struct xmemnode *fromxp, + struct xmemnode *toparent, + struct cred *cred) +{ + int error = 0; + struct xmemnode *dir, *dotdot; + struct xdirent *xdp; + + ASSERT(RW_WRITE_HELD(&toparent->xn_rwlock)); + + xdp = xmemfs_hash_lookup("..", toparent, 1, &dotdot); + if (xdp == NULL) + return (ENOENT); + + ASSERT(dotdot); + + if (dotdot == toparent) { + /* root of fs. search trivially satisfied. */ + xmemnode_rele(dotdot); + return (0); + } + for (;;) { + /* + * Return error for cases like "mv c c/d", + * "mv c c/d/e" and so on. + */ + if (dotdot == fromxp) { + xmemnode_rele(dotdot); + error = EINVAL; + break; + } + dir = dotdot; + error = xdirlookup(dir, "..", &dotdot, cred); + if (error) { + xmemnode_rele(dir); + break; + } + /* + * We're okay if we traverse the directory tree up to + * the root directory and don't run into the + * parent directory. + */ + if (dir == dotdot) { + xmemnode_rele(dir); + xmemnode_rele(dotdot); + break; + } + xmemnode_rele(dir); + } + return (error); +} + +static int +xdirrename( + struct xmemnode *fromparent, /* parent directory of source */ + struct xmemnode *fromxp, /* source xmemnode */ + struct xmemnode *toparent, /* parent directory of target */ + char *nm, /* entry we are trying to change */ + struct xmemnode *to, /* target xmemnode */ + struct xdirent *where, /* target xmemnode directory entry */ + struct cred *cred) /* credentials */ +{ + int error = 0; + int doingdirectory; + timestruc_t now; + +#if defined(lint) + nm = nm; +#endif + ASSERT(RW_WRITE_HELD(&toparent->xn_rwlock)); + + rw_enter(&fromxp->xn_rwlock, RW_READER); + rw_enter(&to->xn_rwlock, RW_READER); + + /* + * Check that everything is on the same filesystem. + */ + if (to->xn_vnode->v_vfsp != toparent->xn_vnode->v_vfsp || + to->xn_vnode->v_vfsp != fromxp->xn_vnode->v_vfsp) { + error = EXDEV; + goto out; + } + + /* + * Short circuit rename of something to itself. + */ + if (fromxp == to) { + error = ESAME; /* special KLUDGE error code */ + goto out; + } + + /* + * Must have write permission to rewrite target entry. + */ + if (error = xmem_xaccess(fromparent, VWRITE, cred)) + goto out; + + /* + * If the parent directory is "sticky", then the user must own + * either the parent directory or the destination of the rename, + * or else must have permission to write the destination. + * Otherwise the destination may not be changed (except by the + * privileged users). This implements append-only directories. + */ + if (error = xmem_sticky_remove_access(toparent, to, cred)) + goto out; + + /* + * Ensure source and target are compatible (both directories + * or both not directories). If target is a directory it must + * be empty and have no links to it; in addition it must not + * be a mount point, and both the source and target must be + * writable. + */ + doingdirectory = (fromxp->xn_type == VDIR); + if (to->xn_type == VDIR) { + if (!doingdirectory) { + error = EISDIR; + goto out; + } + /* + * vn_vfswlock will prevent mounts from using the directory + * until we are done. + */ + if (vn_vfswlock(XNTOV(to))) { + error = EBUSY; + goto out; + } + if (vn_mountedvfs(XNTOV(to)) != NULL) { + vn_vfsunlock(XNTOV(to)); + error = EBUSY; + goto out; + } + + mutex_enter(&to->xn_tlock); + if (to->xn_dirents > 2 || to->xn_nlink > 2) { + mutex_exit(&to->xn_tlock); + vn_vfsunlock(XNTOV(to)); + error = EEXIST; /* SIGH should be ENOTEMPTY */ + /* + * Update atime because checking xn_dirents is + * logically equivalent to reading the directory + */ + gethrestime(&to->xn_atime); + goto out; + } + mutex_exit(&to->xn_tlock); + } else if (doingdirectory) { + error = ENOTDIR; + goto out; + } + + where->xd_xmemnode = fromxp; + gethrestime(&now); + toparent->xn_mtime = now; + toparent->xn_ctime = now; + + /* + * Upgrade to write lock on "to" (i.e., the target xmemnode). + */ + rw_exit(&to->xn_rwlock); + rw_enter(&to->xn_rwlock, RW_WRITER); + + /* + * Decrement the link count of the target xmemnode. + */ + DECR_COUNT(&to->xn_nlink, &to->xn_tlock); + to->xn_ctime = now; + + if (doingdirectory) { + /* + * The entry for "to" no longer exists so release the vfslock. + */ + vn_vfsunlock(XNTOV(to)); + + /* + * Decrement the target link count and delete all entires. + */ + xdirtrunc(to); + ASSERT(to->xn_nlink == 0); + + /* + * Renaming a directory with the parent different + * requires that ".." be rewritten. The window is + * still there for ".." to be inconsistent, but this + * is unavoidable, and a lot shorter than when it was + * done in a user process. + */ + if (fromparent != toparent) + xdirfixdotdot(fromxp, fromparent, toparent); + } +out: + rw_exit(&to->xn_rwlock); + rw_exit(&fromxp->xn_rwlock); + return (error); +} + +static void +xdirfixdotdot( + struct xmemnode *fromxp, /* child directory */ + struct xmemnode *fromparent, /* old parent directory */ + struct xmemnode *toparent) /* new parent directory */ +{ + struct xdirent *dotdot; + + ASSERT(RW_LOCK_HELD(&toparent->xn_rwlock)); + + /* + * Increment the link count in the new parent xmemnode + */ + INCR_COUNT(&toparent->xn_nlink, &toparent->xn_tlock); + gethrestime(&toparent->xn_ctime); + + dotdot = xmemfs_hash_lookup("..", fromxp, 0, NULL); + + ASSERT(dotdot->xd_xmemnode == fromparent); + dotdot->xd_xmemnode = toparent; + + /* + * Decrement the link count of the old parent xmemnode. + * If fromparent is NULL, then this is a new directory link; + * it has no parent, so we need not do anything. + */ + if (fromparent != NULL) { + mutex_enter(&fromparent->xn_tlock); + if (fromparent->xn_nlink != 0) { + fromparent->xn_nlink--; + gethrestime(&fromparent->xn_ctime); + } + mutex_exit(&fromparent->xn_tlock); + } +} + +static int +xdiraddentry( + struct xmemnode *dir, /* target directory to make entry in */ + struct xmemnode *xp, /* new xmemnode */ + char *name, + enum de_op op, + struct xmemnode *fromxp) +{ + struct xdirent *xdp, *tpdp; + size_t namelen, alloc_size; + timestruc_t now; + + /* + * Make sure the parent directory wasn't removed from + * underneath the caller. + */ + if (dir->xn_dir == NULL) + return (ENOENT); + + /* + * Check that everything is on the same filesystem. + */ + if (xp->xn_vnode->v_vfsp != dir->xn_vnode->v_vfsp) + return (EXDEV); + + /* + * Allocate and initialize directory entry + */ + namelen = strlen(name) + 1; + alloc_size = namelen + sizeof (struct xdirent); + xdp = xmem_memalloc(alloc_size, 0); + if (xdp == NULL) + return (ENOSPC); + + if ((op == DE_RENAME) && (xp->xn_type == VDIR)) + xdirfixdotdot(xp, fromxp, dir); + + dir->xn_size += alloc_size; + dir->xn_dirents++; + xdp->xd_xmemnode = xp; + xdp->xd_parent = dir; + + /* + * The directory entry and its name were allocated sequentially. + */ + xdp->xd_name = (char *)xdp + sizeof (struct xdirent); + (void) strcpy(xdp->xd_name, name); + + xmemfs_hash_in(xdp); + + /* + * Some utilities expect the size of a directory to remain + * somewhat static. For example, a routine which unlinks + * files between calls to readdir(); the size of the + * directory changes from underneath it and so the real + * directory offset in bytes is invalid. To circumvent + * this problem, we initialize a directory entry with an + * phony offset, and use this offset to determine end of + * file in xmem_readdir. + */ + tpdp = dir->xn_dir->xd_prev; + /* + * Install at first empty "slot" in directory list. + */ + while (tpdp->xd_next != NULL && (tpdp->xd_next->xd_offset - + tpdp->xd_offset) <= 1) { + ASSERT(tpdp->xd_next != tpdp); + ASSERT(tpdp->xd_prev != tpdp); + ASSERT(tpdp->xd_next->xd_offset > tpdp->xd_offset); + tpdp = tpdp->xd_next; + } + xdp->xd_offset = tpdp->xd_offset + 1; + + /* + * If we're at the end of the dirent list and the offset (which + * is necessarily the largest offset in this directory) is more + * than twice the number of dirents, that means the directory is + * 50% holes. At this point we reset the slot pointer back to + * the beginning of the directory so we start using the holes. + * The idea is that if there are N dirents, there must also be + * N holes, so we can satisfy the next N creates by walking at + * most 2N entries; thus the average cost of a create is constant. + * Note that we use the first dirent's xd_prev as the roving + * slot pointer; it's ugly, but it saves a word in every dirent. + */ + if (tpdp->xd_next == NULL && tpdp->xd_offset > 2 * dir->xn_dirents) + dir->xn_dir->xd_prev = dir->xn_dir->xd_next; + else + dir->xn_dir->xd_prev = xdp; + + ASSERT(tpdp->xd_next != tpdp); + ASSERT(tpdp->xd_prev != tpdp); + + xdp->xd_next = tpdp->xd_next; + if (xdp->xd_next) { + xdp->xd_next->xd_prev = xdp; + } + xdp->xd_prev = tpdp; + tpdp->xd_next = xdp; + + ASSERT(xdp->xd_next != xdp); + ASSERT(xdp->xd_prev != xdp); + ASSERT(tpdp->xd_next != tpdp); + ASSERT(tpdp->xd_prev != tpdp); + + gethrestime(&now); + dir->xn_mtime = now; + dir->xn_ctime = now; + + return (0); +} + +static int +xdirmakexnode( + struct xmemnode *dir, + struct xmount *xm, + struct vattr *va, + enum de_op op, + struct xmemnode **newnode, + struct cred *cred) +{ + struct xmemnode *xp; + enum vtype type; + + ASSERT(va != NULL); + ASSERT(op == DE_CREATE || op == DE_MKDIR); + if (((va->va_mask & AT_ATIME) && TIMESPEC_OVERFLOW(&va->va_atime)) || + ((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime))) + return (EOVERFLOW); + type = va->va_type; + xp = xmem_memalloc(sizeof (struct xmemnode), 1); + xp->xn_vnode = vn_alloc(KM_SLEEP); + xmemnode_init(xm, xp, va, cred); + if (type == VBLK || type == VCHR) { + xp->xn_vnode->v_rdev = xp->xn_rdev = va->va_rdev; + } else { + xp->xn_vnode->v_rdev = xp->xn_rdev = NODEV; + } + xp->xn_vnode->v_type = type; + xp->xn_uid = crgetuid(cred); + + /* + * To determine the group-id of the created file: + * 1) If the gid is set in the attribute list (non-Sun & pre-4.0 + * clients are not likely to set the gid), then use it if + * the process is privileged, belongs to the target group, + * or the group is the same as the parent directory. + * 2) If the filesystem was not mounted with the Old-BSD-compatible + * GRPID option, and the directory's set-gid bit is clear, + * then use the process's gid. + * 3) Otherwise, set the group-id to the gid of the parent directory. + */ + if ((va->va_mask & AT_GID) && + ((va->va_gid == dir->xn_gid) || groupmember(va->va_gid, cred) || + secpolicy_vnode_create_gid(cred) == 0)) { + xp->xn_gid = va->va_gid; + } else { + if (dir->xn_mode & VSGID) + xp->xn_gid = dir->xn_gid; + else + xp->xn_gid = crgetgid(cred); + } + /* + * If we're creating a directory, and the parent directory has the + * set-GID bit set, set it on the new directory. + * Otherwise, if the user is neither privileged nor a member of the + * file's new group, clear the file's set-GID bit. + */ + if (dir->xn_mode & VSGID && type == VDIR) + xp->xn_mode |= VSGID; + else if ((xp->xn_mode & VSGID) && + secpolicy_vnode_setids_setgids(cred, xp->xn_gid) != 0) + xp->xn_mode &= ~VSGID; + + if (va->va_mask & AT_ATIME) + xp->xn_atime = va->va_atime; + if (va->va_mask & AT_MTIME) + xp->xn_mtime = va->va_mtime; + + if (op == DE_MKDIR) + xdirinit(dir, xp); + + *newnode = xp; + return (0); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deleted_files/usr/src/uts/intel/fs/xmemfs/xmem_subr.c Thu Jan 18 16:23:02 2007 -0800 @@ -0,0 +1,566 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/errno.h> +#include <sys/param.h> +#include <sys/t_lock.h> +#include <sys/systm.h> +#include <sys/sysmacros.h> +#include <sys/debug.h> +#include <sys/time.h> +#include <sys/cmn_err.h> +#include <sys/vnode.h> +#include <sys/vfs.h> +#include <sys/cred.h> +#include <sys/kmem.h> +#include <sys/stat.h> +#include <sys/mode.h> +#include <vm/hat.h> +#include <vm/seg_map.h> +#include <vm/seg_kmem.h> +#include <vm/pvn.h> +#include <vm/page.h> +#include <sys/atomic.h> +#include <sys/policy.h> +#include <sys/fs/xmem.h> + + +extern void *xpgget(struct xmount *); +extern void xpgput(struct xmount *, void *); + +#define MODESHIFT 3 + +size_t xmemfs_maxkmem = 32768; +size_t xmemfs_kmemcnt; + +int +xmem_xaccess(void *vxp, int mode, struct cred *cred) +{ + struct xmemnode *xp = vxp; + int shift = 0; + /* + * Check access based on owner, group and + * public permissions in xmemnode. + */ + if (crgetuid(cred) != xp->xn_uid) { + shift += MODESHIFT; + if (groupmember(xp->xn_gid, cred) == 0) + shift += MODESHIFT; + } + + mode &= ~(xp->xn_mode << shift); + + if (mode == 0) + return (0); + + return (secpolicy_vnode_access(cred, XNTOV(xp), xp->xn_uid, mode)); +} + +/* + * Decide whether it is okay to remove within a sticky directory. + * Two conditions need to be met: write access to the directory + * is needed. In sticky directories, write access is not sufficient; + * you can remove entries from a directory only if you own the directory, + * if you are privileged, if you own the entry or if they entry is + * a plain file and you have write access to that file. + * Function returns 0 if remove access is granted. + */ +int +xmem_sticky_remove_access(struct xmemnode *dir, struct xmemnode *entry, + struct cred *cr) +{ + uid_t uid; + + if ((dir->xn_mode & S_ISVTX) && + (uid = crgetuid(cr)) != dir->xn_uid && + uid != entry->xn_uid && + (entry->xn_type != VREG || + xmem_xaccess(entry, VWRITE, cr) != 0)) + return (secpolicy_vnode_remove(cr)); + return (0); +} + +/* + * Allocate zeroed memory if xmemfs_maxkmem has not been exceeded + * or the 'musthave' flag is set. 'musthave' allocations should + * always be subordinate to normal allocations so that xmemfs_maxkmem + * can't be exceeded by more than a few KB. Example: when creating + * a new directory, the xmemnode is a normal allocation; if that + * succeeds, the dirents for "." and ".." are 'musthave' allocations. + */ +void * +xmem_memalloc(size_t size, int musthave) +{ + void *ptr = NULL; + + if (musthave) { + atomic_add_long(&xmemfs_kmemcnt, size); + ptr = kmem_zalloc(size, KM_SLEEP); + } else if (xmemfs_kmemcnt + size < xmemfs_maxkmem) { + /* + * kmemcnt may have increased since above check so a little + * more than xmemfs_maxkmem may be allocated. + */ + ptr = kmem_zalloc(size, KM_NOSLEEP); + if (ptr) + atomic_add_long(&xmemfs_kmemcnt, size); + } + return (ptr); +} + +void +xmem_memfree(void *cp, size_t size) +{ + extern size_t xmemfs_kmemcnt; + + kmem_free(cp, size); + atomic_add_long(&xmemfs_kmemcnt, -size); +} + +/* add to the number of pages we have created */ + +int +xmem_mem_add(struct xmount *xm, size_t size) +{ + mutex_enter(&xm->xm_contents); + + /* allocate the last available block */ + if ((xm->xm_mem + size) > xm->xm_max) { + mutex_exit(&xm->xm_contents); + return (1); + } + xm->xm_mem += size; + mutex_exit(&xm->xm_contents); + return (0); +} + +/* sub to the number of pages we have created */ + +static void +xmem_mem_sub(struct xmount *xm, size_t size) +{ + mutex_enter(&xm->xm_contents); + xm->xm_mem -= size; + mutex_exit(&xm->xm_contents); +} + +/* + * xmem_acquire_pages: returns an array of size btop(xm_bsize) page pointers + * or xm_bsize bytes. + * + * If large page, the array will contain 1024 entries (4MB) or 512 entries. + * + * If not large page, there is no array as a page_t * is returned. + */ + +static page_t ** +xmem_acquire_pages(struct xmount *xm, struct vnode *vp, offset_t off) +{ + page_t **ppa, *pp, *pplist; + uint_t pindex; + size_t bsize; + struct seg tmpseg; + + bsize = xm->xm_bsize; + + if (xmem_mem_add(xm, 1)) + return (NULL); + + if (xm->xm_flags & XARGS_RESERVEMEM) { + + mutex_enter(&xm->xm_contents); + ppa = xpgget(xm); + mutex_exit(&xm->xm_contents); + + if (xm->xm_ppb == 1) { + /* ppa is a direct page pointer */ + + if (!page_hashin((page_t *)ppa, vp, off, NULL)) { + panic("xmem_acquire_pages: hashin failed" + " %p %llx", (void *)vp, off); + } + pindex = xm->xm_ppb; /* bypass for loop */ + } else { + pindex = 0; + } + + for (; pindex < xm->xm_ppb; pindex++, off += PAGESIZE) { + pp = ppa[pindex]; + if (!page_hashin(pp, vp, off, NULL)) { + panic("xmem_acquire_pages: hashin failed" + " %p %p %llx", (void *)pp, (void *)vp, off); + } + } + return (ppa); + } + bzero(&tmpseg, sizeof (struct seg)); + tmpseg.s_as = &kas; + + if ((freemem - xm->xm_ppb) < xmemfs_minfree || + page_resv(xm->xm_ppb, KM_NOSLEEP) == 0) { + + cmn_err(CE_WARN, "%s: File system full, no memory", + xm->xm_mntpath); + return (NULL); + } + + (void) page_create_wait(xm->xm_ppb, PG_WAIT); + + pplist = page_get_freelist(vp, off, &tmpseg, + (caddr_t)(uintptr_t)off, bsize, 0, NULL); + if (pplist == NULL && xm->xm_ppb == 1) { + pplist = page_get_cachelist(vp, off, &tmpseg, + (caddr_t)(uintptr_t)off, 0, NULL); + } + if (pplist == NULL) { + page_create_putback(xm->xm_ppb); + page_unresv(xm->xm_ppb); + return (NULL); + } + if (PP_ISAGED(pplist) == 0) { + ASSERT(xm->xm_ppb == 1); + page_hashout(pplist, NULL); + } + + if (xm->xm_ppb > 1) + ppa = kmem_alloc(sizeof (*ppa) * xm->xm_ppb, KM_SLEEP); + + for (pindex = 0; pindex < xm->xm_ppb; pindex++, off += PAGESIZE) { + pp = pplist; + page_sub(&pplist, pp); + ASSERT(PAGE_EXCL(pp)); + ASSERT(pp->p_vnode == NULL); + ASSERT(!hat_page_is_mapped(pp)); + PP_CLRFREE(pp); + PP_CLRAGED(pp); + + if (xm->xm_ppb == 1) + ppa = (page_t **)pp; + else + ppa[pindex] = pp; + + if (!page_hashin(pp, vp, off, NULL)) { + panic("xmem_acquire_pages: hashin failed" + " %p %p %llx", (void *)pp, (void *)vp, off); + } + page_downgrade(pp); /* XXX */ + } + return (ppa); +} + +static void +xmem_release_pages(struct xmount *xm, page_t **ppa) +{ + uint_t pindex; + page_t *pp; + + xmem_mem_sub(xm, 1); + + if (xm->xm_flags & XARGS_RESERVEMEM) { + + /* + * if ppb == 1 and to lessen the load on kmem memory in + * having to allocate a million 4 byte pointers for a + * 4 GB file system, ppa is actually a page_t * + */ + + if (xm->xm_ppb == 1) { + page_hashout((page_t *)ppa, NULL); + pindex = xm->xm_ppb; /* bypass for loop */ + } else + pindex = 0; + + for (; pindex < xm->xm_ppb; pindex++) { + pp = ppa[pindex]; + page_hashout(pp, NULL); + } + mutex_enter(&xm->xm_contents); + xpgput(xm, ppa); + mutex_exit(&xm->xm_contents); + + } else { + int flag = B_INVAL; + + if (xm->xm_ppb == 1) { + VN_DISPOSE((page_t *)ppa, flag, 0, kcred); + } else { + + for (pindex = 0; pindex < xm->xm_ppb; pindex++) + VN_DISPOSE(ppa[pindex], flag, 0, kcred); + + kmem_free(ppa, sizeof (*ppa) * xm->xm_ppb); + } + page_unresv(xm->xm_ppb); + } +} + +/* + * Initialize a xmemnode and add it to file list under mount point. + */ +void +xmemnode_init(struct xmount *xm, struct xmemnode *xp, + vattr_t *vap, cred_t *cred) +{ + struct vnode *vp; + timestruc_t now; + + ASSERT(vap != NULL); + ASSERT(cred != NULL); + + rw_init(&xp->xn_rwlock, NULL, RW_DEFAULT, NULL); + mutex_init(&xp->xn_tlock, NULL, MUTEX_DEFAULT, NULL); + xp->xn_mode = MAKEIMODE(vap->va_type, vap->va_mode); + + if (S_ISREG(xp->xn_mode)) + xp->xn_mode &= ~(S_IXUSR | S_IXGRP | S_IXOTH); + + xp->xn_mask = 0; + xp->xn_type = vap->va_type; + xp->xn_nodeid = (ino64_t)(uint32_t)((uintptr_t)xp >> 3); + xp->xn_nlink = 1; + xp->xn_size = 0; + xp->xn_uid = crgetuid(cred); + xp->xn_gid = crgetgid(cred); + + xp->xn_fsid = xm->xm_dev; + xp->xn_rdev = vap->va_rdev; + xp->xn_blksize = PAGESIZE; + xp->xn_nblocks = 0; + gethrestime(&now); + xp->xn_atime = now; + xp->xn_mtime = now; + xp->xn_ctime = now; + xp->xn_seq = 0; + xp->xn_dir = NULL; + + vp = XNTOV(xp); + vn_reinit(vp); + vn_setops(vp, xmem_vnodeops); + vp->v_vfsp = xm->xm_vfsp; + vp->v_type = vap->va_type; + vp->v_rdev = vap->va_rdev; + vp->v_data = (caddr_t)xp; + + mutex_enter(&xm->xm_contents); + /* + * Increment the pseudo generation number for this xmemnode. + * Since xmemnodes are allocated and freed, there really is no + * particular generation number for a new xmemnode. Just fake it + * by using a counter in each file system. + */ + xp->xn_gen = xm->xm_gen++; + + /* + * Add new xmemnode to end of linked list of xmemnodes for this xmemfs + * Root directory is handled specially in xmem_mount. + */ + if (xm->xm_rootnode != (struct xmemnode *)NULL) { + xp->xn_forw = NULL; + xp->xn_back = xm->xm_rootnode->xn_back; + xp->xn_back->xn_forw = xm->xm_rootnode->xn_back = xp; + } + mutex_exit(&xm->xm_contents); +} + +/* + * + */ +int +xmem_fillpages(struct xmemnode *xp, struct vnode *vp, offset_t off, + offset_t len, int zerofill) +{ + uint_t blockno, endblock; + caddr_t base; + int error = 0; + struct xmount *xm = (struct xmount *)VTOXM(vp); + offset_t poff; + size_t bsize = xm->xm_bsize; + + blockno = off >> xm->xm_bshift; + poff = (offset_t)blockno << xm->xm_bshift; + endblock = howmany(off + len, (offset_t)bsize); + + if (endblock > xp->xn_ppasz) + return (EINVAL); + + /* Create missing pages if any */ + for (; blockno < endblock; ) { + if (!xp->xn_ppa[blockno]) { + xp->xn_ppa[blockno] = xmem_acquire_pages(xm, vp, poff); + if (!xp->xn_ppa[blockno]) + return (ENOSPC); + if (zerofill) { + page_t **ppp; + if (xm->xm_ppb == 1) + ppp = (page_t **)&xp->xn_ppa[blockno]; + else + ppp = xp->xn_ppa[blockno]; + + base = segxmem_getmap(xm->xm_map, vp, poff, + bsize, ppp, S_WRITE); + (void) kzero(base, bsize); + segxmem_release(xm->xm_map, base, bsize); + } + xp->xn_nblocks++; + } + blockno++; + poff += bsize; + } + return (error); +} + +/* + * xmemnode_trunc - set length of xmemnode and deal with resources + */ +int +xmemnode_trunc(struct xmount *xm, struct xmemnode *xp, u_offset_t newsize) +{ + u_offset_t oldsize = xp->xn_size; + timestruc_t now; + int error = 0; + size_t zlen; + ulong_t newblocks, oldblocks; + + ASSERT(RW_WRITE_HELD(&xp->xn_rwlock)); + ASSERT(RW_WRITE_HELD(&xp->xn_contents)); + + if (newsize == oldsize) { + /* Required by POSIX */ + goto stamp_out; + } + + switch (xp->xn_type) { + case VREG: + + oldblocks = howmany(oldsize, xm->xm_bsize); + newblocks = howmany(newsize, xm->xm_bsize); + + XMEMPRINTF(4, ("xmemnode_trunc: xp %p old %lx new %lx\n", + xp, oldblocks, newblocks)); + /* + * xn_ppasz is the size of the ppa array which may not + * be fully populated if pages cannot be allocated. + */ + ASSERT(xp->xn_ppasz >= oldblocks); + + /* Growing the file */ + if (newblocks > oldblocks) { + if (xp->xn_ppasz < newblocks) { + page_t ***ppa; + ppa = kmem_zalloc(newblocks * sizeof (*ppa), KM_SLEEP); + if (xp->xn_ppasz) { + bcopy(xp->xn_ppa, ppa, + newblocks * sizeof (*ppa)); + + kmem_free(xp->xn_ppa, + xp->xn_ppasz * sizeof (*ppa)); + } + xp->xn_ppa = ppa; + xp->xn_ppasz = newblocks; + } + } + + /* Free pages if shrinking file over block boundary. */ + if (newblocks < oldblocks) { + uint_t next; + page_t ***ppa = NULL; + next = newblocks; + if (next) { + ppa = kmem_zalloc(next * sizeof (*ppa), + KM_SLEEP); + bcopy(xp->xn_ppa, ppa, next * sizeof (*ppa)); + } + for (; next < oldblocks; next++) { + if (!xp->xn_ppa[next]) + continue; + xmem_release_pages(xm, xp->xn_ppa[next]); + xp->xn_nblocks--; + } + kmem_free(xp->xn_ppa, xp->xn_ppasz * sizeof (*ppa)); + xp->xn_ppa = ppa; + xp->xn_ppasz = newblocks; + } + + /* + * Update the file size now to reflect the pages we just + * blew away as we're about to drop the + * contents lock to zero the partial page (which could + * re-enter xmemfs via getpage and try to reacquire the lock) + * Once we drop the lock, faulters can fill in holes in + * the file and if we haven't updated the size they + * may fill in holes that are beyond EOF, which will then + * never get cleared. + */ + xp->xn_size = newsize; + + + if (newsize) { + /* Zero new size of file to page boundary. */ + zlen = PAGESIZE - ((ulong_t)newsize & PAGEOFFSET); + rw_exit(&xp->xn_contents); + pvn_vpzero(XNTOV(xp), (u_offset_t)newsize, zlen); + rw_enter(&xp->xn_contents, RW_WRITER); + } + + break; + + case VLNK: + /* + * Don't do anything here + * xmem_inactive frees the memory + */ + if (newsize != 0) + error = EINVAL; + goto out; + case VDIR: + /* + * Remove all the directory entries under this directory. + */ + if (newsize != 0) { + error = EINVAL; + goto out; + } + xdirtrunc(xp); + ASSERT(xp->xn_nlink == 0); + break; + default: + goto out; + } + +stamp_out: + gethrestime(&now); + xp->xn_mtime = now; + xp->xn_ctime = now; +out: + /* + * xmemnode_trunc() cannot fail when newsize == 0. + */ + ASSERT(error == 0 || newsize != 0); + return (error); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deleted_files/usr/src/uts/intel/fs/xmemfs/xmem_vfsops.c Thu Jan 18 16:23:02 2007 -0800 @@ -0,0 +1,810 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/sysmacros.h> +#include <sys/kmem.h> +#include <sys/time.h> +#include <sys/pathname.h> +#include <sys/vfs.h> +#include <sys/vnode.h> +#include <sys/stat.h> +#include <sys/uio.h> +#include <sys/stat.h> +#include <sys/errno.h> +#include <sys/cmn_err.h> +#include <sys/cred.h> +#include <sys/statvfs.h> +#include <sys/mount.h> +#include <sys/mntent.h> +#include <sys/debug.h> +#include <sys/systm.h> +#include <sys/vmsystm.h> +#include <sys/bitmap.h> +#include <fs/fs_subr.h> +#include <vm/page.h> +#include <sys/model.h> +#include <sys/map.h> +#include <vm/seg_kmem.h> +#include <sys/cpuvar.h> +#include <sys/policy.h> + +#include <sys/fs/swapnode.h> +#include <sys/fs/xmem.h> + +#ifndef min +#define min(a, b) ((a) < (b) ? (a) : (b)) +#endif + +/* + * xmemfs vfs operations. + */ +static int xmemfsinit(int, char *); +static int xmem_mount(struct vfs *, struct vnode *, + struct mounta *, struct cred *); +static int xmem_unmount(struct vfs *, int, struct cred *); +static int xmem_root(struct vfs *, struct vnode **); +static int xmem_statvfs(struct vfs *, struct statvfs64 *); +static int xmem_vget(struct vfs *, struct vnode **, struct fid *); + +/* + * Loadable module wrapper + */ +#include <sys/modctl.h> + +static vfsdef_t vfw = { + VFSDEF_VERSION, + "xmemfs", + xmemfsinit, + 0, + NULL +}; + +/* + * Module linkage information + */ +static struct modlfs modlfs = { + &mod_fsops, "filesystem for xmemfs", &vfw +}; + +static struct modlinkage modlinkage = { + MODREV_1, &modlfs, NULL +}; + +pgcnt_t xmemfs_minfree; + +int +_init() +{ + return (mod_install(&modlinkage)); +} + +int +_fini() +{ + return (mod_remove(&modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +static int xmemfsfstype; +static major_t xmemfs_major; +static minor_t xmemfs_minor; +static kmutex_t xmemfs_minor_lock; + + +/* + * initialize global xmemfs locks and such + * called when loading xmemfs module + */ +static int +xmemfsinit(int fstype, char *name) +{ + static const fs_operation_def_t xmem_vfsops[] = { + VFSNAME_MOUNT, xmem_mount, + VFSNAME_UNMOUNT, xmem_unmount, + VFSNAME_ROOT, xmem_root, + VFSNAME_STATVFS, xmem_statvfs, + VFSNAME_VGET, xmem_vget, + NULL, NULL + }; + int error; + extern void xmemfs_hash_init(); + + error = vfs_setfsops(fstype, xmem_vfsops, NULL); + if (error != 0) { + cmn_err(CE_WARN, "xmemfsinit: bad vfs ops template"); + return (error); + } + + error = vn_make_ops(name, xmem_vnodeops_template, &xmem_vnodeops); + if (error != 0) { + (void) vfs_freevfsops_by_type(fstype); + cmn_err(CE_WARN, "xmemfsinit: bad vnode ops template"); + return (error); + } + + xmemfs_hash_init(); + xmemfsfstype = fstype; + ASSERT(xmemfsfstype != 0); + + if ((xmemfs_major = getudev()) == (major_t)-1) { + cmn_err(CE_WARN, "xmemfsinit: Can't get unique device number."); + xmemfs_major = 0; + } + mutex_init(&xmemfs_minor_lock, NULL, MUTEX_DEFAULT, NULL); + + return (0); +} + + +/* + * xpg is an array of page_t * if xm_ppb > 1. + * xpg is a page_t * if xm_ppb == 1 + */ +void +xpgput(struct xmount *xm, void *xpg) +{ + ASSERT(xm->xm_xpgcnt < xm->xm_max); + xm->xm_xpgarray[xm->xm_xpgcnt++] = xpg; +} + +void * +xpgget(struct xmount *xm) +{ + if (!xm->xm_xpgcnt) + return (NULL); + + return (xm->xm_xpgarray[--xm->xm_xpgcnt]); +} + +void +xpginit(struct xmount *xm) +{ + xm->xm_xpgcnt = 0; + xm->xm_xpgarray = kmem_zalloc(sizeof (void *) * xm->xm_max, KM_SLEEP); +} + +void +xpgtrunc(struct xmount *xm, size_t newsz) +{ + void *old = xm->xm_xpgarray; + + ASSERT(newsz == xm->xm_xpgcnt); + if (newsz) { + xm->xm_xpgarray = + kmem_alloc(sizeof (void *) * newsz, KM_SLEEP); + bcopy(old, xm->xm_xpgarray, sizeof (void *) * newsz); + } + kmem_free(old, sizeof (void *) * xm->xm_max); +} + +void +xpgdeinit(struct xmount *xm) +{ + xm->xm_xpgcnt = 0; + if (xm->xm_max) + kmem_free(xm->xm_xpgarray, sizeof (void *) * xm->xm_max); + xm->xm_xpgarray = NULL; +} + + +struct xmount *xmountp; /* ### DEBUG */ + +#define XFREE(xm, xp) \ + vn_free(xp->xn_vnode); \ + xmem_memfree(xp, sizeof (struct xmemnode)); \ + rmfreemap(xm->xm_map); \ + xmem_memfree(xm->xm_mntpath, strlen(xm->xm_mntpath) + 1); \ + xpgdeinit(xm); \ + xmem_memfree(xm, sizeof (struct xmount)); + + +static int +xmem_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap, + struct cred *cr) +{ + struct xmount *xm; + struct xmemnode *xp; + struct pathname dpn; + char *data = uap->dataptr; + int datalen = uap->datalen; + int error; + struct xmemfs_args xargs; + struct vattr rattr; + int got_attrs, num_pagesizes; + uint_t blocks_left; + size_t frag; + + XMEMPRINTF(1, ("xmem_mount: vfs %p mvp %p uap %p cr %p\n", + (void *)vfsp, (void *)mvp, (void *)uap, (void *)cr)); + + if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0) + return (error); + + if (mvp->v_type != VDIR) + return (ENOTDIR); + + /* + * Force non-executable files by setting the "noexec" option + * which will be interpreted by the VFS layer. + */ + vfs_setmntopt(vfsp, MNTOPT_NOEXEC, NULL, 0); + + mutex_enter(&mvp->v_lock); + if ((uap->flags & MS_OVERLAY) == 0 && + (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { + mutex_exit(&mvp->v_lock); + return (EBUSY); + } + mutex_exit(&mvp->v_lock); + + /* + * Get arguments + */ + if (datalen != 0) { + if (datalen != sizeof (xargs)) + return (EINVAL); + else { + if (copyin(data, &xargs, sizeof (xargs))) + return (EFAULT); + } + if (xargs.xa_bsize == 0) + xargs.xa_bsize = PAGESIZE; + } else { + xargs.xa_bsize = PAGESIZE; + xargs.xa_flags = 0; + xargs.xa_fssize = 0; + } + + XMEMPRINTF(1, ("xmem_mount: xa bsize %llx fssize %llx flags %x\n", + xargs.xa_bsize, xargs.xa_fssize, xargs.xa_flags)); + + num_pagesizes = page_num_pagesizes(); + + if (xargs.xa_flags & XARGS_LARGEPAGES) + xargs.xa_bsize = page_get_pagesize(num_pagesizes - 1); + + /* Make sure xa_bsize is a pure power of two */ + if (!IS_P2ALIGNED(xargs.xa_bsize, xargs.xa_bsize - 1)) { + cmn_err(CE_WARN, "xmemfs: invalid blocksize %x", + (int)xargs.xa_bsize); + xargs.xa_bsize = PAGESIZE; + } + + while (--num_pagesizes >= 0) + if (xargs.xa_bsize == page_get_pagesize(num_pagesizes)) + break; + + if (num_pagesizes < 0) { + cmn_err(CE_WARN, + "xmemfs: blocksize %lld not a natural pagesize", + xargs.xa_bsize); + xargs.xa_bsize = PAGESIZE; + } + + if (error = pn_get(uap->dir, UIO_USERSPACE, &dpn)) + return (error); + + xm = xmem_memalloc(sizeof (struct xmount), 1); + + xmountp = xm; + + XMEMPRINTF(4, ("xmem_mount: xm %p\n", (void *)xm)); + + xm->xm_mntpath = xmem_memalloc(dpn.pn_pathlen + 1, 1); + (void) strcpy(xm->xm_mntpath, dpn.pn_path); + pn_free(&dpn); + + xm->xm_vmmapsize = xm->xm_mapsize = + xargs.xa_bsize * SEGXMEM_NUM_SIMULMAPS; + + /* need to allocate more to ensure alignment if largepage */ + + if (xargs.xa_bsize != PAGESIZE) + xm->xm_vmmapsize += xargs.xa_bsize; + + /* Set block size & max memory allowed for the file system */ + xm->xm_bsize = (size_t)xargs.xa_bsize; + xm->xm_bshift = highbit(xargs.xa_bsize) - 1; + + /* + * 5 * lotsfree satisfies XMEMMINFREE for 4 GB of memory and above. + */ + xmemfs_minfree = min(5 * lotsfree, XMEMMINFREE/PAGESIZE); + + if (xargs.xa_fssize) { + + pgcnt_t fspgcnt; + + xargs.xa_fssize = roundup(xargs.xa_fssize, xm->xm_bsize); + + fspgcnt = xargs.xa_fssize >> PAGESHIFT; + + /* sanity check this against freemem */ + if (fspgcnt + xmemfs_minfree > freemem) { + xmem_memfree(xm->xm_mntpath, + strlen(xm->xm_mntpath) + 1); + xmem_memfree(xm, sizeof (struct xmount)); + return (EFBIG); + } + xm->xm_max = xargs.xa_fssize >> xm->xm_bshift; + } else { + /* + * fssize is mandatory - should not be here but if + * fssize == 0 is allowed, grab all of free memory + * minus xmemfs_minfree. + */ + + if (freemem < xmemfs_minfree) + xm->xm_max = 0; + else + xm->xm_max = freemem - xmemfs_minfree; + + xm->xm_max >>= xm->xm_bshift - PAGESHIFT; + } + + xm->xm_ppb = btop(xm->xm_bsize); /* pages per block */ + + + XMEMPRINTF(1, ("xmem_mount: xm_max %lx xm_bsize %lx\n", + xm->xm_max, xm->xm_bsize)); + + /* + * Allocate a map to provide an address for each page in + * (xargs.xa_bsize * 4) and free all of them. + */ + xm->xm_map = rmallocmap_wait(xm->xm_mapsize / PAGESIZE); + + xpginit(xm); + + xp = xmem_memalloc(sizeof (struct xmemnode), 1); + xp->xn_vnode = vn_alloc(KM_SLEEP); + + /* + * do not SLEEP waiting for memory resources after vmem_alloc + */ + + xm->xm_vmmapaddr = xm->xm_mapaddr = + vmem_alloc(heap_arena, xm->xm_vmmapsize, VM_NOSLEEP); + + if (!xm->xm_mapaddr) { + XFREE(xm, xp); + return (ENOMEM); + } + + if ((frag = ((uintptr_t)xm->xm_mapaddr & + ((uintptr_t)xargs.xa_bsize - 1))) != 0) + xm->xm_mapaddr += (xargs.xa_bsize - frag); + + rmfree(xm->xm_map, xm->xm_mapsize, (ulong_t)xm->xm_mapaddr); + + if (xargs.xa_flags & XARGS_RESERVEMEM) { + struct seg tmpseg; + + /* grab all memory now */ + blocks_left = xm->xm_max; + bzero(&tmpseg, sizeof (struct seg)); + tmpseg.s_as = &kas; + + if (page_resv(xm->xm_max * xm->xm_ppb, KM_NOSLEEP) == 0) { + vmem_free(heap_arena, xm->xm_vmmapaddr, + xm->xm_vmmapsize); + XFREE(xm, xp); + return (ENOMEM); + } + + while (blocks_left) { + page_t *pp, *pplist; + page_t **ppa; + int i; + + /* + * optimise for ppb == 1 - let xp_ppa point directly + * to page. + */ + + if (xm->xm_ppb > 1) { + ppa = kmem_alloc(sizeof (page_t *) * xm->xm_ppb, + KM_NOSLEEP); + + if (!ppa) { + xpgtrunc(xm, xm->xm_max - blocks_left); + xm->xm_max -= blocks_left; + page_unresv(blocks_left * xm->xm_ppb); + if (xargs.xa_fssize) + cmn_err(CE_WARN, + "could only reserve %d blocks " + "for xmemfs", (int)xm->xm_max); + break; + } + } + + (void) page_create_wait(xm->xm_ppb, PG_WAIT); + pplist = page_get_freelist(NULL, 0, &tmpseg, NULL, + xm->xm_bsize, 0, NULL); + + if (pplist == NULL && xm->xm_ppb == 1) { + pplist = page_get_cachelist(NULL, 0, &tmpseg, + NULL, 0, NULL); + } + + if (pplist == NULL) { + page_create_putback(xm->xm_ppb); + if (xm->xm_ppb > 1) + kmem_free(ppa, sizeof (page_t *) * + xm->xm_ppb); + xpgtrunc(xm, xm->xm_max - blocks_left); + xm->xm_max -= blocks_left; + page_unresv(blocks_left * xm->xm_ppb); + if (xargs.xa_fssize) + cmn_err(CE_WARN, + "could only reserve %d blocks " + "for xmemfs", (int)xm->xm_max); + break; + } + + if (PP_ISAGED(pplist) == 0) { + ASSERT(xm->xm_ppb == 1); + page_hashout(pplist, NULL); + } + + for (i = 0; i < xm->xm_ppb; i++) { + pp = pplist; + page_sub(&pplist, pp); + ASSERT(PAGE_EXCL(pp)); + ASSERT(pp->p_vnode == NULL); + ASSERT(!hat_page_is_mapped(pp)); + PP_CLRFREE(pp); + PP_CLRAGED(pp); + if (xm->xm_ppb == 1) + ppa = (page_t **)pp; + else + ppa[i] = pp; + } + + xpgput(xm, ppa); + blocks_left--; + } + if (!xm->xm_xpgcnt) { + /* No pages at all */ + page_unresv(xm->xm_max * xm->xm_ppb); + vmem_free(heap_arena, xm->xm_vmmapaddr, + xm->xm_vmmapsize); + XFREE(xm, xp); + return (ENOMEM); + } + xm->xm_flags |= XARGS_RESERVEMEM; + } + xm->xm_bsize = (size_t)xargs.xa_bsize; + + /* + * find an available minor device number for this mount + */ + mutex_enter(&xmemfs_minor_lock); + do { + xmemfs_minor = (xmemfs_minor + 1) & L_MAXMIN32; + xm->xm_dev = makedevice(xmemfs_major, xmemfs_minor); + } while (vfs_devismounted(xm->xm_dev)); + mutex_exit(&xmemfs_minor_lock); + + /* + * Set but don't bother entering the mutex + * (xmount not on mount list yet) + */ + mutex_init(&xm->xm_contents, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&xm->xm_renamelck, NULL, MUTEX_DEFAULT, NULL); + + xm->xm_vfsp = vfsp; + + vfsp->vfs_data = (caddr_t)xm; + vfsp->vfs_fstype = xmemfsfstype; + vfsp->vfs_dev = xm->xm_dev; + vfsp->vfs_bsize = xm->xm_bsize; + vfsp->vfs_flag |= VFS_NOTRUNC; + vfs_make_fsid(&vfsp->vfs_fsid, xm->xm_dev, xmemfsfstype); + + /* + * allocate and initialize root xmemnode structure + */ + bzero(&rattr, sizeof (struct vattr)); + rattr.va_mode = (mode_t)(S_IFDIR | 0777); + rattr.va_type = VDIR; + rattr.va_rdev = 0; + xmemnode_init(xm, xp, &rattr, cr); + + /* + * Get the mode, uid, and gid from the underlying mount point. + */ + rattr.va_mask = AT_MODE|AT_UID|AT_GID; /* Hint to getattr */ + got_attrs = VOP_GETATTR(mvp, &rattr, 0, cr); + + rw_enter(&xp->xn_rwlock, RW_WRITER); + XNTOV(xp)->v_flag |= VROOT; + + /* + * If the getattr succeeded, use its results. Otherwise allow + * the previously set hardwired defaults to prevail. + */ + if (got_attrs == 0) { + xp->xn_mode = rattr.va_mode; + xp->xn_uid = rattr.va_uid; + xp->xn_gid = rattr.va_gid; + } + + /* + * initialize linked list of xmemnodes so that the back pointer of + * the root xmemnode always points to the last one on the list + * and the forward pointer of the last node is null. + */ + xp->xn_back = xp; + xp->xn_forw = NULL; + xp->xn_nlink = 0; + xm->xm_rootnode = xp; + + xdirinit(xp, xp); + + rw_exit(&xp->xn_rwlock); + + return (0); +} + +static int +xmem_unmount(struct vfs *vfsp, int flag, struct cred *cr) +{ + struct xmount *xm = (struct xmount *)VFSTOXM(vfsp); + struct xmemnode *xp; + + if (secpolicy_fs_unmount(cr, vfsp) != 0) + return (EPERM); + /* + * forced unmount is not supported by this file system + * and thus, ENOTSUP, is being returned. + */ + if (flag & MS_FORCE) + return (ENOTSUP); + + mutex_enter(&xm->xm_contents); + + /* + * Don't close down the xmemfs if there are open files. + * There should be only one file referenced (the rootnode) + * and only one reference to the vnode for that file. + */ + xp = xm->xm_rootnode; + if (XNTOV(xp)->v_count > 1) { + mutex_exit(&xm->xm_contents); + return (EBUSY); + } + + for (xp = xp->xn_forw; xp; xp = xp->xn_forw) { + if (XNTOV(xp)->v_count > 0) { + mutex_exit(&xm->xm_contents); + return (EBUSY); + } + } + + /* + * We can drop the mutex now because no one can find this mount + */ + mutex_exit(&xm->xm_contents); + + /* + * Free all kmemalloc'd and non-anonalloc'd memory associated with + * this filesystem. To do this, we go through the file list twice, + * once to remove all the directory entries, and then to remove + * all the files. We do this because there is useful code in + * xmemnode_free which assumes that the directory entry has been + * removed before the file. + */ + /* + * Remove all directory entries + */ + for (xp = xm->xm_rootnode; xp; xp = xp->xn_forw) { + rw_enter(&xp->xn_rwlock, RW_WRITER); + if (xp->xn_type == VDIR) + xdirtrunc(xp); + rw_exit(&xp->xn_rwlock); + } + + ASSERT(xm->xm_rootnode); + + /* + * We re-acquire the lock to prevent others who have a HOLD on + * a xmemnode via its pages from blowing it away + * (in xmem_inactive) while we're trying to get to it here. Once + * we have a HOLD on it we know it'll stick around. + */ + mutex_enter(&xm->xm_contents); + /* + * Remove all the files (except the rootnode) backwards. + */ + while ((xp = xm->xm_rootnode->xn_back) != xm->xm_rootnode) { + /* + * Blow the xmemnode away by HOLDing it and RELE'ing it. + * The RELE calls inactive and blows it away because there + * we have the last HOLD. + */ + VN_HOLD(XNTOV(xp)); + mutex_exit(&xm->xm_contents); + VN_RELE(XNTOV(xp)); + mutex_enter(&xm->xm_contents); + /* + * It's still there after the RELE. Someone else like pageout + * has a hold on it so wait a bit and then try again - we know + * they'll give it up soon. + */ + if (xp == xm->xm_rootnode->xn_back) { + mutex_exit(&xm->xm_contents); + delay(hz / 4); + mutex_enter(&xm->xm_contents); + } + } + if (xm->xm_flags & XARGS_RESERVEMEM) { + page_t **ppa; + uint_t pindex; + + while ((ppa = xpgget(xm)) != NULL) { + if (xm->xm_ppb == 1) { + /*LINTED*/ + VN_DISPOSE((page_t *)ppa, B_FREE, 0, kcred); + continue; + } + /* free each page */ + for (pindex = 0; pindex < xm->xm_ppb; pindex++) { + ASSERT(ppa[pindex]->p_szc); + ppa[pindex]->p_szc = 0; + /*LINTED*/ + VN_DISPOSE(ppa[pindex], B_FREE, 0, kcred); + } + kmem_free(ppa, sizeof (*ppa) * xm->xm_ppb); + } + xpgdeinit(xm); + page_unresv(xm->xm_max * xm->xm_ppb); + } + mutex_exit(&xm->xm_contents); + + VN_RELE(XNTOV(xm->xm_rootnode)); + + ASSERT(xm->xm_mntpath); + + xmem_memfree(xm->xm_mntpath, strlen(xm->xm_mntpath) + 1); + + mutex_destroy(&xm->xm_contents); + mutex_destroy(&xm->xm_renamelck); + vmem_free(heap_arena, xm->xm_vmmapaddr, xm->xm_vmmapsize); + rmfreemap(xm->xm_map); + xmem_memfree(xm, sizeof (struct xmount)); + + return (0); +} + +/* + * return root xmemnode for given vnode + */ +static int +xmem_root(struct vfs *vfsp, struct vnode **vpp) +{ + struct xmount *xm = (struct xmount *)VFSTOXM(vfsp); + struct xmemnode *xp = xm->xm_rootnode; + struct vnode *vp; + + ASSERT(xp); + + vp = XNTOV(xp); + VN_HOLD(vp); + *vpp = vp; + return (0); +} + +static int +xmem_statvfs(struct vfs *vfsp, struct statvfs64 *sbp) +{ + struct xmount *xm = (struct xmount *)VFSTOXM(vfsp); + long blocks; + dev32_t d32; + + sbp->f_bsize = xm->xm_bsize; + sbp->f_frsize = xm->xm_bsize; /* No fragmentation for now ? */ + + /* + * Find the amount of available physical and memory swap + */ + if (xm->xm_flags & XARGS_RESERVEMEM) + blocks = xm->xm_max - xm->xm_mem; + else + blocks = MAX((long)(freemem - lotsfree - xmemfs_minfree), 0); + + sbp->f_bavail = sbp->f_bfree = (fsblkcnt64_t)blocks; + + /* + * Total number of blocks is what's available plus what's been used + */ + sbp->f_blocks = (fsblkcnt64_t)(sbp->f_bfree + xm->xm_mem); + + /* + * return a somewhat arbitrary number of inodes available + */ + sbp->f_favail = sbp->f_ffree = (fsfilcnt64_t)((xm->xm_max/1024)+1); + (void) cmpldev(&d32, vfsp->vfs_dev); + sbp->f_fsid = d32; + (void) strcpy(sbp->f_basetype, vfssw[xmemfsfstype].vsw_name); + (void) strcpy(sbp->f_fstr, xm->xm_mntpath); + sbp->f_flag = vf_to_stf(vfsp->vfs_flag); + sbp->f_namemax = MAXNAMELEN - 1; + return (0); +} + +static int +xmem_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp) +{ + register struct xfid *xfid; + register struct xmount *xm = (struct xmount *)VFSTOXM(vfsp); + register struct xmemnode *xp = NULL; + + xfid = (struct xfid *)fidp; + *vpp = NULL; + + mutex_enter(&xm->xm_contents); + for (xp = xm->xm_rootnode; xp; xp = xp->xn_forw) { + mutex_enter(&xp->xn_tlock); + if (xp->xn_nodeid == xfid->xfid_ino) { + /* + * If the gen numbers don't match we know the + * file won't be found since only one xmemnode + * can have this number at a time. + */ + if (xp->xn_gen != xfid->xfid_gen || xp->xn_nlink == 0) { + mutex_exit(&xp->xn_tlock); + mutex_exit(&xm->xm_contents); + return (0); + } + *vpp = (struct vnode *)XNTOV(xp); + + VN_HOLD(*vpp); + + if ((xp->xn_mode & S_ISVTX) && + !(xp->xn_mode & (S_IXUSR | S_IFDIR))) { + mutex_enter(&(*vpp)->v_lock); + (*vpp)->v_flag |= VISSWAP; + mutex_exit(&(*vpp)->v_lock); + } + mutex_exit(&xp->xn_tlock); + mutex_exit(&xm->xm_contents); + return (0); + } + mutex_exit(&xp->xn_tlock); + } + mutex_exit(&xm->xm_contents); + return (0); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deleted_files/usr/src/uts/intel/fs/xmemfs/xmem_vnops.c Thu Jan 18 16:23:02 2007 -0800 @@ -0,0 +1,1736 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/t_lock.h> +#include <sys/systm.h> +#include <sys/sysmacros.h> +#include <sys/user.h> +#include <sys/time.h> +#include <sys/vfs.h> +#include <sys/vnode.h> +#include <sys/file.h> +#include <sys/fcntl.h> +#include <sys/flock.h> +#include <sys/kmem.h> +#include <sys/uio.h> +#include <sys/errno.h> +#include <sys/stat.h> +#include <sys/cred.h> +#include <sys/dirent.h> +#include <sys/pathname.h> +#include <sys/vmsystm.h> +#include <sys/map.h> +#include <sys/fs/xmem.h> +#include <sys/mman.h> +#include <vm/hat.h> +#include <vm/seg.h> +#include <vm/as.h> +#include <vm/page.h> +#include <vm/pvn.h> +#include <sys/cmn_err.h> +#include <sys/debug.h> +#include <sys/swap.h> +#include <sys/buf.h> +#include <sys/vm.h> +#include <sys/vtrace.h> +#include <sys/policy.h> +#include <fs/fs_subr.h> + +static int xmem_getapage(struct vnode *, u_offset_t, size_t, uint_t *, + page_t **, size_t, struct seg *, caddr_t, enum seg_rw, struct cred *); + +#ifndef lint +static int xmem_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, + int, struct cred *); +#endif + + +/* ARGSUSED1 */ +static int +xmem_open(struct vnode **vpp, int flag, struct cred *cred) +{ + /* + * swapon to a xmemfs file is not supported so access + * is denied on open if VISSWAP is set. + */ + if ((*vpp)->v_flag & VISSWAP) + return (EINVAL); + return (0); +} + +/* ARGSUSED1 */ +static int +xmem_close(struct vnode *vp, int flag, int count, offset_t offset, + struct cred *cred) +{ + cleanlocks(vp, ttoproc(curthread)->p_pid, 0); + cleanshares(vp, ttoproc(curthread)->p_pid); + return (0); +} + + +/* + * wrxmem does the real work of write requests for xmemfs. + */ +static int +wrxmem(struct xmount *xm, struct xmemnode *xp, struct uio *uio, + struct cred *cr, struct caller_context *ct) +{ + uint_t blockoffset; /* offset in the block */ + uint_t blkwr; /* offset in blocks into xmem file */ + uint_t blkcnt; + caddr_t base; + ssize_t bytes; /* bytes to uiomove */ + struct vnode *vp; + int error = 0; + size_t bsize = xm->xm_bsize; + rlim64_t limit = uio->uio_llimit; + long oresid = uio->uio_resid; + timestruc_t now; + offset_t offset; + + /* + * xp->xn_size is incremented before the uiomove + * is done on a write. If the move fails (bad user + * address) reset xp->xn_size. + * The better way would be to increment xp->xn_size + * only if the uiomove succeeds. + */ + long xn_size_changed = 0; + offset_t old_xn_size; + + vp = XNTOV(xp); + ASSERT(vp->v_type == VREG); + + XMEMPRINTF(1, ("wrxmem: vp %p resid %lx off %llx\n", + (void *)vp, uio->uio_resid, uio->uio_loffset)); + + ASSERT(RW_WRITE_HELD(&xp->xn_contents)); + ASSERT(RW_WRITE_HELD(&xp->xn_rwlock)); + + if (MANDLOCK(vp, xp->xn_mode)) { + rw_exit(&xp->xn_contents); + /* + * xmem_getattr ends up being called by chklock + */ + error = chklock(vp, FWRITE, + uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct); + + rw_enter(&xp->xn_contents, RW_WRITER); + if (error != 0) { + XMEMPRINTF(8, ("wrxmem: vp %p error %x\n", + (void *)vp, error)); + return (error); + } + } + + if ((offset = uio->uio_loffset) < 0) + return (EINVAL); + + if (offset >= limit) { + proc_t *p = ttoproc(curthread); + + mutex_enter(&p->p_lock); + (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls, + p, RCA_UNSAFE_SIGINFO); + mutex_exit(&p->p_lock); + return (EFBIG); + } + + if (uio->uio_resid == 0) { + XMEMPRINTF(8, ("wrxmem: vp %p resid %lx\n", + (void *)vp, uio->uio_resid)); + return (0); + } + + /* + * Get the highest blocknumber and allocate page array if needed. + * Note that if xm_bsize != PAGESIZE, each ppa[] is pointer to + * a page array rather than just a page. + */ + blkcnt = howmany((offset + uio->uio_resid), bsize); + blkwr = offset >> xm->xm_bshift; /* write begins here */ + + XMEMPRINTF(1, ("wrxmem: vp %p blkcnt %x blkwr %x xn_ppasz %lx\n", + (void *)vp, blkcnt, blkwr, xp->xn_ppasz)); + + /* file size increase */ + if (xp->xn_ppasz < blkcnt) { + + page_t ***ppa; + int ppasz; + uint_t blksinfile = howmany(xp->xn_size, bsize); + + /* + * check if sufficient blocks available for the given offset. + */ + if (blkcnt - blksinfile > xm->xm_max - xm->xm_mem) + return (ENOSPC); + + /* + * to prevent reallocating every time the file grows by a + * single block, double the size of the array. + */ + if (blkcnt < xp->xn_ppasz * 2) + ppasz = xp->xn_ppasz * 2; + else + ppasz = blkcnt; + + + ppa = kmem_zalloc(ppasz * sizeof (page_t **), KM_SLEEP); + + ASSERT(ppa); + + if (xp->xn_ppasz) { + bcopy(xp->xn_ppa, ppa, blksinfile * sizeof (*ppa)); + kmem_free(xp->xn_ppa, xp->xn_ppasz * sizeof (*ppa)); + } + xp->xn_ppa = ppa; + xp->xn_ppasz = ppasz; + + /* + * fill in the 'hole' if write offset beyond file size. This + * helps in creating large files quickly; an application can + * lseek to a large offset and perform a single write + * operation to create the large file. + */ + + if (blksinfile < blkwr) { + + old_xn_size = xp->xn_size; + xp->xn_size = (offset_t)blkwr * bsize; + + XMEMPRINTF(4, ("wrxmem: fill vp %p blks %x to %x\n", + (void *)vp, blksinfile, blkcnt - 1)); + error = xmem_fillpages(xp, vp, + (offset_t)blksinfile * bsize, + (offset_t)(blkcnt - blksinfile) * bsize, 1); + if (error) { + /* truncate file back to original size */ + (void) xmemnode_trunc(xm, xp, old_xn_size); + return (error); + } + /* + * if error on blkwr, this allows truncation of the + * filled hole. + */ + xp->xn_size = old_xn_size; + } + } + + do { + offset_t pagestart, pageend; + page_t **ppp; + + blockoffset = (uint_t)offset & (bsize - 1); + /* + * A maximum of xm->xm_bsize bytes of data is transferred + * each pass through this loop + */ + bytes = MIN(bsize - blockoffset, uio->uio_resid); + + ASSERT(bytes); + + if (offset + bytes >= limit) { + if (offset >= limit) { + error = EFBIG; + goto out; + } + bytes = limit - offset; + } + + + if (!xp->xn_ppa[blkwr]) { + /* zero fill new pages - simplify partial updates */ + error = xmem_fillpages(xp, vp, offset, bytes, 1); + if (error) + return (error); + } + + /* grow the file to the new length */ + if (offset + bytes > xp->xn_size) { + xn_size_changed = 1; + old_xn_size = xp->xn_size; + xp->xn_size = offset + bytes; + } + +#ifdef LOCKNEST + xmem_getpage(); +#endif + + /* xn_ppa[] is a page_t * if ppb == 1 */ + if (xm->xm_ppb == 1) + ppp = (page_t **)&xp->xn_ppa[blkwr]; + else + ppp = &xp->xn_ppa[blkwr][btop(blockoffset)]; + + pagestart = offset & ~(offset_t)(PAGESIZE - 1); + /* + * subtract 1 in case (offset + bytes) is mod PAGESIZE + * so that pageend is the actual index of last page. + */ + pageend = (offset + bytes - 1) & ~(offset_t)(PAGESIZE - 1); + + base = segxmem_getmap(xm->xm_map, vp, + pagestart, pageend - pagestart + PAGESIZE, + ppp, S_WRITE); + + rw_exit(&xp->xn_contents); + + error = uiomove(base + (offset - pagestart), bytes, + UIO_WRITE, uio); + segxmem_release(xm->xm_map, base, + pageend - pagestart + PAGESIZE); + + /* + * Re-acquire contents lock. + */ + rw_enter(&xp->xn_contents, RW_WRITER); + /* + * If the uiomove failed, fix up xn_size. + */ + if (error) { + if (xn_size_changed) { + /* + * The uiomove failed, and we + * allocated blocks,so get rid + * of them. + */ + (void) xmemnode_trunc(xm, xp, old_xn_size); + } + } else { + if ((xp->xn_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) && + (xp->xn_mode & (S_ISUID | S_ISGID)) && + secpolicy_vnode_setid_retain(cr, + (xp->xn_mode & S_ISUID) != 0 && xp->xn_uid == 0) + != 0) { + + /* + * Clear Set-UID & Set-GID bits on + * successful write if not privileged + * and at least one of the execute bits + * is set. If we always clear Set-GID, + * mandatory file and record locking is + * unuseable. + */ + xp->xn_mode &= ~(S_ISUID | S_ISGID); + } + gethrestime(&now); + xp->xn_mtime = now; + xp->xn_ctime = now; + } + offset = uio->uio_loffset; /* uiomove sets uio_loffset */ + blkwr++; + } while (error == 0 && uio->uio_resid > 0 && bytes != 0); + +out: + /* + * If we've already done a partial-write, terminate + * the write but return no error. + */ + if (oresid != uio->uio_resid) + error = 0; + return (error); +} + +/* + * rdxmem does the real work of read requests for xmemfs. + */ +static int +rdxmem( + struct xmount *xm, + struct xmemnode *xp, + struct uio *uio, + struct caller_context *ct) +{ + ulong_t blockoffset; /* offset in xmemfs file (uio_offset) */ + caddr_t base; + ssize_t bytes; /* bytes to uiomove */ + struct vnode *vp; + int error; + uint_t blocknumber; + long oresid = uio->uio_resid; + size_t bsize = xm->xm_bsize; + offset_t offset; + + vp = XNTOV(xp); + + XMEMPRINTF(1, ("rdxmem: vp %p\n", (void *)vp)); + + ASSERT(RW_LOCK_HELD(&xp->xn_contents)); + + if (MANDLOCK(vp, xp->xn_mode)) { + rw_exit(&xp->xn_contents); + /* + * xmem_getattr ends up being called by chklock + */ + error = chklock(vp, FREAD, + uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct); + rw_enter(&xp->xn_contents, RW_READER); + if (error != 0) { + XMEMPRINTF(1, + ("rdxmem: vp %p error %x\n", (void *)vp, error)); + return (error); + } + } + ASSERT(xp->xn_type == VREG); + + if ((offset = uio->uio_loffset) >= MAXOFF_T) { + XMEMPRINTF(1, ("rdxmem: vp %p bad offset %llx\n", + (void *)vp, uio->uio_loffset)); + return (0); + } + if (offset < 0) + return (EINVAL); + + if (uio->uio_resid == 0) { + XMEMPRINTF(1, ("rdxmem: vp %p resid 0\n", (void *)vp)); + return (0); + } + + blocknumber = offset >> xm->xm_bshift; + do { + offset_t diff, pagestart, pageend; + uint_t pageinblock; + + blockoffset = offset & (bsize - 1); + /* + * A maximum of xm->xm_bsize bytes of data is transferred + * each pass through this loop + */ + bytes = MIN(bsize - blockoffset, uio->uio_resid); + + diff = xp->xn_size - offset; + + if (diff <= 0) { + error = 0; + goto out; + } + if (diff < bytes) + bytes = diff; + + if (!xp->xn_ppa[blocknumber]) + if (error = xmem_fillpages(xp, vp, offset, bytes, 1)) { + return (error); + } + /* + * We have to drop the contents lock to prevent the VM + * system from trying to reacquire it in xmem_getpage() + * should the uiomove cause a pagefault. + */ + rw_exit(&xp->xn_contents); + +#ifdef LOCKNEST + xmem_getpage(); +#endif + + /* 2/10 panic in hat_memload_array - len & MMU_OFFSET */ + + pagestart = offset & ~(offset_t)(PAGESIZE - 1); + pageend = (offset + bytes - 1) & ~(offset_t)(PAGESIZE - 1); + if (xm->xm_ppb == 1) + base = segxmem_getmap(xm->xm_map, vp, + pagestart, pageend - pagestart + PAGESIZE, + (page_t **)&xp->xn_ppa[blocknumber], S_READ); + else { + pageinblock = btop(blockoffset); + base = segxmem_getmap(xm->xm_map, vp, + pagestart, pageend - pagestart + PAGESIZE, + &xp->xn_ppa[blocknumber][pageinblock], S_READ); + + } + error = uiomove(base + (blockoffset & (PAGESIZE - 1)), + bytes, UIO_READ, uio); + + segxmem_release(xm->xm_map, base, + pageend - pagestart + PAGESIZE); + /* + * Re-acquire contents lock. + */ + rw_enter(&xp->xn_contents, RW_READER); + + offset = uio->uio_loffset; + blocknumber++; + } while (error == 0 && uio->uio_resid > 0); + +out: + gethrestime(&xp->xn_atime); + + /* + * If we've already done a partial read, terminate + * the read but return no error. + */ + if (oresid != uio->uio_resid) + error = 0; + + return (error); +} + +/* ARGSUSED2 */ +static int +xmem_read(struct vnode *vp, struct uio *uiop, int ioflag, cred_t *cred, + struct caller_context *ct) +{ + struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); + struct xmount *xm = (struct xmount *)VTOXM(vp); + int error; + + /* + * We don't currently support reading non-regular files + */ + if (vp->v_type != VREG) + return (EINVAL); + /* + * xmem_rwlock should have already been called from layers above + */ + ASSERT(RW_READ_HELD(&xp->xn_rwlock)); + + rw_enter(&xp->xn_contents, RW_READER); + + error = rdxmem(xm, xp, uiop, ct); + + rw_exit(&xp->xn_contents); + + return (error); +} + +static int +xmem_write(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred, + struct caller_context *ct) +{ + struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); + struct xmount *xm = (struct xmount *)VTOXM(vp); + int error; + + /* + * We don't currently support writing to non-regular files + */ + if (vp->v_type != VREG) + return (EINVAL); /* XXX EISDIR? */ + + /* + * xmem_rwlock should have already been called from layers above + */ + ASSERT(RW_WRITE_HELD(&xp->xn_rwlock)); + + rw_enter(&xp->xn_contents, RW_WRITER); + + if (ioflag & FAPPEND) { + /* + * In append mode start at end of file. + */ + uiop->uio_loffset = xp->xn_size; + } + + error = wrxmem(xm, xp, uiop, cred, ct); + + rw_exit(&xp->xn_contents); + + return (error); +} + +/* ARGSUSED */ +static int +xmem_ioctl(struct vnode *vp, int com, intptr_t data, int flag, + struct cred *cred, int *rvalp) +{ + return (ENOTTY); +} + +/* ARGSUSED2 */ +static int +xmem_getattr(struct vnode *vp, struct vattr *vap, int flags, struct cred *cred) +{ + struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); + struct xmount *xm = (struct xmount *)VTOXM(vp); + + mutex_enter(&xp->xn_tlock); + + *vap = xp->xn_attr; + + vap->va_mode = xp->xn_mode & MODEMASK; + vap->va_type = vp->v_type; + vap->va_blksize = xm->xm_bsize; + vap->va_nblocks = (fsblkcnt64_t)btodb(ptob(btopr(vap->va_size))); + + mutex_exit(&xp->xn_tlock); + return (0); +} + +/*ARGSUSED*/ +static int +xmem_setattr(struct vnode *vp, struct vattr *vap, int flags, struct cred *cred, + caller_context_t *ct) +{ + struct xmount *xm = (struct xmount *)VTOXM(vp); + struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); + int error; + struct vattr *get; + register long int mask = vap->va_mask; + + /* + * Cannot set these attributes + */ + if (mask & AT_NOSET) + return (EINVAL); + + mutex_enter(&xp->xn_tlock); + + get = &xp->xn_attr; + + error = secpolicy_vnode_setattr(cred, vp, vap, get, flags, + xmem_xaccess, xp); + + if (error != 0) + goto out; + + mask = vap->va_mask; + + /* + * Change file access modes. + */ + if (mask & AT_MODE) { + /* prevent execute permission to be set for regular files */ + if (S_ISREG(get->va_mode)) + vap->va_mode &= ~(S_IXUSR | S_IXGRP | S_IXOTH); + + XMEMPRINTF(1, ("xmem_setattr: va_mode old %x new %x\n", + get->va_mode, vap->va_mode)); + + get->va_mode &= S_IFMT; + get->va_mode |= vap->va_mode & ~S_IFMT; + } + + if (mask & AT_UID) + get->va_uid = vap->va_uid; + if (mask & AT_GID) + get->va_gid = vap->va_gid; + if (mask & AT_ATIME) + get->va_atime = vap->va_atime; + if (mask & AT_MTIME) + get->va_mtime = vap->va_mtime; + if (mask & (AT_UID | AT_GID | AT_MODE | AT_MTIME)) + gethrestime(&get->va_ctime); + + if (mask & AT_SIZE) { + if (vp->v_type == VDIR) { + error = EISDIR; + goto out; + } + /* Don't support large files. */ + if (vap->va_size > MAXOFF_T) { + error = EFBIG; + goto out; + } + if (error = xmem_xaccess(xp, VWRITE, cred)) + goto out; + mutex_exit(&xp->xn_tlock); + + rw_enter(&xp->xn_rwlock, RW_WRITER); + rw_enter(&xp->xn_contents, RW_WRITER); + error = xmemnode_trunc(xm, xp, vap->va_size); + rw_exit(&xp->xn_contents); + rw_exit(&xp->xn_rwlock); + goto out1; + } +out: + mutex_exit(&xp->xn_tlock); +out1: + return (error); +} + +/* ARGSUSED2 */ +static int +xmem_access(struct vnode *vp, int mode, int flags, struct cred *cred) +{ + struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); + int error; + + mutex_enter(&xp->xn_tlock); + error = xmem_xaccess(xp, mode, cred); + mutex_exit(&xp->xn_tlock); + return (error); +} + +/* ARGSUSED3 */ +static int +xmem_lookup(struct vnode *dvp, char *nm, struct vnode **vpp, + struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred) +{ + struct xmemnode *xp = (struct xmemnode *)VTOXN(dvp); + struct xmemnode *nxp = NULL; + int error; + + /* + * Null component name is a synonym for directory being searched. + */ + if (*nm == '\0') { + VN_HOLD(dvp); + *vpp = dvp; + return (0); + } + ASSERT(xp); + + error = xdirlookup(xp, nm, &nxp, cred); + + if (error == 0) { + ASSERT(nxp); + *vpp = XNTOV(nxp); + /* + * If vnode is a device return special vnode instead + */ + if (IS_DEVVP(*vpp)) { + struct vnode *newvp; + + newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, + cred); + VN_RELE(*vpp); + *vpp = newvp; + } + } + return (error); +} + +/*ARGSUSED7*/ +static int +xmem_create(struct vnode *dvp, char *nm, struct vattr *vap, + enum vcexcl exclusive, int mode, struct vnode **vpp, struct cred *cred, + int flag) +{ + struct xmemnode *parent; + struct xmount *xm; + struct xmemnode *self; + int error; + struct xmemnode *oldxp; + +again: + parent = (struct xmemnode *)VTOXN(dvp); + xm = (struct xmount *)VTOXM(dvp); + self = NULL; + error = 0; + oldxp = NULL; + + if (vap->va_type == VREG && (vap->va_mode & VSVTX)) { + /* Must be privileged to set sticky bit */ + if (secpolicy_vnode_stky_modify(cred) != 0) + vap->va_mode &= ~VSVTX; + } else if (vap->va_type == VNON) { + return (EINVAL); + } + + /* + * Null component name is a synonym for directory being searched. + */ + if (*nm == '\0') { + VN_HOLD(dvp); + oldxp = parent; + } else { + error = xdirlookup(parent, nm, &oldxp, cred); + } + + if (error == 0) { /* name found */ + ASSERT(oldxp); + + rw_enter(&oldxp->xn_rwlock, RW_WRITER); + + /* + * if create/read-only an existing + * directory, allow it + */ + if (exclusive == EXCL) + error = EEXIST; + else if ((oldxp->xn_type == VDIR) && (mode & VWRITE)) + error = EISDIR; + else { + error = xmem_xaccess(oldxp, mode, cred); + } + + if (error) { + rw_exit(&oldxp->xn_rwlock); + xmemnode_rele(oldxp); + return (error); + } + *vpp = XNTOV(oldxp); + if ((*vpp)->v_type == VREG && (vap->va_mask & AT_SIZE) && + vap->va_size == 0) { + rw_enter(&oldxp->xn_contents, RW_WRITER); + (void) xmemnode_trunc(xm, oldxp, 0); + rw_exit(&oldxp->xn_contents); + } + rw_exit(&oldxp->xn_rwlock); + if (IS_DEVVP(*vpp)) { + struct vnode *newvp; + + newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, + cred); + VN_RELE(*vpp); + *vpp = newvp; + } + return (0); + } + + if (error != ENOENT) + return (error); + + rw_enter(&parent->xn_rwlock, RW_WRITER); + error = xdirenter(xm, parent, nm, DE_CREATE, + (struct xmemnode *)NULL, (struct xmemnode *)NULL, + vap, &self, cred); + rw_exit(&parent->xn_rwlock); + + if (error) { + if (self) + xmemnode_rele(self); + + if (error == EEXIST) { + /* + * This means that the file was created sometime + * after we checked and did not find it and when + * we went to create it. + * Since creat() is supposed to truncate a file + * that already exits go back to the begining + * of the function. This time we will find it + * and go down the xmem_trunc() path + */ + goto again; + } + return (error); + } + + *vpp = XNTOV(self); + + if (IS_DEVVP(*vpp)) { + struct vnode *newvp; + + newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, + cred); + VN_RELE(*vpp); + *vpp = newvp; + } + + return (0); +} + +static int +xmem_remove(struct vnode *dvp, char *nm, struct cred *cred) +{ + struct xmemnode *parent = (struct xmemnode *)VTOXN(dvp); + int error; + struct xmemnode *xp = NULL; + + error = xdirlookup(parent, nm, &xp, cred); + if (error) + return (error); + + ASSERT(xp); + rw_enter(&parent->xn_rwlock, RW_WRITER); + rw_enter(&xp->xn_rwlock, RW_WRITER); + + if (xp->xn_type != VDIR || + (error = secpolicy_fs_linkdir(cred, dvp->v_vfsp)) == 0) + error = xdirdelete(parent, xp, nm, DR_REMOVE, cred); + + rw_exit(&xp->xn_rwlock); + rw_exit(&parent->xn_rwlock); + xmemnode_rele(xp); + + return (error); +} + +static int +xmem_link(struct vnode *dvp, struct vnode *srcvp, char *tnm, struct cred *cred) +{ + struct xmemnode *parent; + struct xmemnode *from; + struct xmount *xm = (struct xmount *)VTOXM(dvp); + int error; + struct xmemnode *found = NULL; + struct vnode *realvp; + + if (VOP_REALVP(srcvp, &realvp) == 0) + srcvp = realvp; + + parent = (struct xmemnode *)VTOXN(dvp); + from = (struct xmemnode *)VTOXN(srcvp); + + if ((srcvp->v_type == VDIR && + secpolicy_fs_linkdir(cred, dvp->v_vfsp) != 0) || + (from->xn_uid != crgetuid(cred) && secpolicy_basic_link(cred) != 0)) + return (EPERM); + + error = xdirlookup(parent, tnm, &found, cred); + if (error == 0) { + ASSERT(found); + xmemnode_rele(found); + return (EEXIST); + } + + if (error != ENOENT) + return (error); + + rw_enter(&parent->xn_rwlock, RW_WRITER); + error = xdirenter(xm, parent, tnm, DE_LINK, (struct xmemnode *)NULL, + from, NULL, (struct xmemnode **)NULL, cred); + rw_exit(&parent->xn_rwlock); + return (error); +} + +static int +xmem_rename( + struct vnode *odvp, /* source parent vnode */ + char *onm, /* source name */ + struct vnode *ndvp, /* destination parent vnode */ + char *nnm, /* destination name */ + struct cred *cred) +{ + struct xmemnode *fromparent; + struct xmemnode *toparent; + struct xmemnode *fromxp = NULL; /* source xmemnode */ + struct xmount *xm = (struct xmount *)VTOXM(odvp); + int error; + int samedir = 0; /* set if odvp == ndvp */ + struct vnode *realvp; + + if (VOP_REALVP(ndvp, &realvp) == 0) + ndvp = realvp; + + fromparent = (struct xmemnode *)VTOXN(odvp); + toparent = (struct xmemnode *)VTOXN(ndvp); + + mutex_enter(&xm->xm_renamelck); + + /* + * Look up xmemnode of file we're supposed to rename. + */ + error = xdirlookup(fromparent, onm, &fromxp, cred); + if (error) { + mutex_exit(&xm->xm_renamelck); + return (error); + } + + /* + * Make sure we can delete the old (source) entry. This + * requires write permission on the containing directory. If + * that directory is "sticky" it further requires (except for + * for privileged users) that the user own the directory or + * the source entry, or else have permission to write the + * source entry. + */ + if (((error = xmem_xaccess(fromparent, VWRITE, cred)) != 0) || + (error = xmem_sticky_remove_access(fromparent, fromxp, cred)) != 0) + goto done; + + /* + * Check for renaming to or from '.' or '..' or that + * fromxp == fromparent + */ + if ((onm[0] == '.' && + (onm[1] == '\0' || (onm[1] == '.' && onm[2] == '\0'))) || + (nnm[0] == '.' && + (nnm[1] == '\0' || (nnm[1] == '.' && nnm[2] == '\0'))) || + (fromparent == fromxp)) { + error = EINVAL; + goto done; + } + + samedir = (fromparent == toparent); + /* + * Make sure we can search and rename into the new + * (destination) directory. + */ + if (!samedir) { + error = xmem_xaccess(toparent, VEXEC|VWRITE, cred); + if (error) + goto done; + } + + /* + * Link source to new target + */ + rw_enter(&toparent->xn_rwlock, RW_WRITER); + error = xdirenter(xm, toparent, nnm, DE_RENAME, + fromparent, fromxp, (struct vattr *)NULL, + (struct xmemnode **)NULL, cred); + rw_exit(&toparent->xn_rwlock); + + if (error) { + /* + * ESAME isn't really an error; it indicates that the + * operation should not be done because the source and target + * are the same file, but that no error should be reported. + */ + if (error == ESAME) + error = 0; + goto done; + } + + /* + * Unlink from source. + */ + rw_enter(&fromparent->xn_rwlock, RW_WRITER); + rw_enter(&fromxp->xn_rwlock, RW_WRITER); + + error = xdirdelete(fromparent, fromxp, onm, DR_RENAME, cred); + + /* + * The following handles the case where our source xmemnode was + * removed before we got to it. + * + * XXX We should also cleanup properly in the case where xdirdelete + * fails for some other reason. Currently this case shouldn't happen. + * (see 1184991). + */ + if (error == ENOENT) + error = 0; + + rw_exit(&fromxp->xn_rwlock); + rw_exit(&fromparent->xn_rwlock); +done: + xmemnode_rele(fromxp); + mutex_exit(&xm->xm_renamelck); + + return (error); +} + +static int +xmem_mkdir(struct vnode *dvp, char *nm, struct vattr *va, struct vnode **vpp, + struct cred *cred) +{ + struct xmemnode *parent = (struct xmemnode *)VTOXN(dvp); + struct xmemnode *self = NULL; + struct xmount *xm = (struct xmount *)VTOXM(dvp); + int error; + + /* + * Might be dangling directory. Catch it here, + * because a ENOENT return from xdirlookup() is + * an "o.k. return". + */ + if (parent->xn_nlink == 0) + return (ENOENT); + + error = xdirlookup(parent, nm, &self, cred); + if (error == 0) { + ASSERT(self); + xmemnode_rele(self); + return (EEXIST); + } + if (error != ENOENT) + return (error); + + rw_enter(&parent->xn_rwlock, RW_WRITER); + error = xdirenter(xm, parent, nm, DE_MKDIR, + (struct xmemnode *)NULL, (struct xmemnode *)NULL, va, + &self, cred); + if (error) { + rw_exit(&parent->xn_rwlock); + if (self) + xmemnode_rele(self); + return (error); + } + rw_exit(&parent->xn_rwlock); + *vpp = XNTOV(self); + return (0); +} + +static int +xmem_rmdir(struct vnode *dvp, char *nm, struct vnode *cdir, struct cred *cred) +{ + struct xmemnode *parent = (struct xmemnode *)VTOXN(dvp); + struct xmemnode *self = NULL; + struct vnode *vp; + int error = 0; + + /* + * Return error when removing . and .. + */ + if (strcmp(nm, ".") == 0) + return (EINVAL); + if (strcmp(nm, "..") == 0) + return (EEXIST); /* Should be ENOTEMPTY */ + error = xdirlookup(parent, nm, &self, cred); + if (error) + return (error); + + rw_enter(&parent->xn_rwlock, RW_WRITER); + rw_enter(&self->xn_rwlock, RW_WRITER); + + vp = XNTOV(self); + if (vp == dvp || vp == cdir) { + error = EINVAL; + goto done1; + } + if (self->xn_type != VDIR) { + error = ENOTDIR; + goto done1; + } + + mutex_enter(&self->xn_tlock); + if (self->xn_nlink > 2) { + mutex_exit(&self->xn_tlock); + error = EEXIST; + goto done1; + } + mutex_exit(&self->xn_tlock); + + if (vn_vfswlock(vp)) { + error = EBUSY; + goto done1; + } + if (vn_mountedvfs(vp) != NULL) { + error = EBUSY; + goto done; + } + + /* + * Check for an empty directory + * i.e. only includes entries for "." and ".." + */ + if (self->xn_dirents > 2) { + error = EEXIST; /* SIGH should be ENOTEMPTY */ + /* + * Update atime because checking xn_dirents is logically + * equivalent to reading the directory + */ + gethrestime(&self->xn_atime); + goto done; + } + + error = xdirdelete(parent, self, nm, DR_RMDIR, cred); +done: + vn_vfsunlock(vp); +done1: + rw_exit(&self->xn_rwlock); + rw_exit(&parent->xn_rwlock); + xmemnode_rele(self); + + return (error); +} + +/* ARGSUSED2 */ + +static int +xmem_readdir(struct vnode *vp, struct uio *uiop, struct cred *cred, int *eofp) +{ + struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); + struct xdirent *xdp; + int error; + register struct dirent64 *dp; + register ulong_t offset; + register ulong_t total_bytes_wanted; + register long outcount = 0; + register long bufsize; + int reclen; + caddr_t outbuf; + + if (uiop->uio_loffset >= MAXOFF_T) { + if (eofp) + *eofp = 1; + return (0); + } + /* + * assuming system call has already called xmem_rwlock + */ + ASSERT(RW_READ_HELD(&xp->xn_rwlock)); + + if (uiop->uio_iovcnt != 1) + return (EINVAL); + + if (vp->v_type != VDIR) + return (ENOTDIR); + + /* + * There's a window here where someone could have removed + * all the entries in the directory after we put a hold on the + * vnode but before we grabbed the rwlock. Just return unless + * there are still references to the current file in which case panic. + */ + if (xp->xn_dir == NULL) { + if (xp->xn_nlink) + cmn_err(CE_PANIC, "empty directory 0x%p", (void *)xp); + return (0); + } + + /* + * Get space for multiple directory entries + */ + total_bytes_wanted = uiop->uio_iov->iov_len; + bufsize = total_bytes_wanted + sizeof (struct dirent64); + outbuf = kmem_alloc(bufsize, KM_SLEEP); + + dp = (struct dirent64 *)outbuf; + + + offset = 0; + xdp = xp->xn_dir; + while (xdp) { + offset = xdp->xd_offset; + if (offset >= uiop->uio_offset) { + reclen = (int)DIRENT64_RECLEN(strlen(xdp->xd_name)); + if (outcount + reclen > total_bytes_wanted) + break; + ASSERT(xdp->xd_xmemnode != NULL); + + /* use strncpy(9f) to zero out uninitialized bytes */ + + ASSERT(strlen(xdp->xd_name) + 1 <= + DIRENT64_NAMELEN(reclen)); + (void) strncpy(dp->d_name, xdp->xd_name, + DIRENT64_NAMELEN(reclen)); + dp->d_reclen = (ushort_t)reclen; + dp->d_ino = (ino64_t)xdp->xd_xmemnode->xn_nodeid; + dp->d_off = (offset_t)xdp->xd_offset + 1; + dp = (struct dirent64 *) + ((uintptr_t)dp + dp->d_reclen); + outcount += reclen; + ASSERT(outcount <= bufsize); + } + xdp = xdp->xd_next; + } + error = uiomove(outbuf, outcount, UIO_READ, uiop); + if (!error) { + /* If we reached the end of the list our offset */ + /* should now be just past the end. */ + if (!xdp) { + offset += 1; + if (eofp) + *eofp = 1; + } else if (eofp) + *eofp = 0; + uiop->uio_offset = offset; + } + gethrestime(&xp->xn_atime); + kmem_free(outbuf, bufsize); + return (error); +} + +static int +xmem_symlink(struct vnode *dvp, char *lnm, struct vattr *tva, char *tnm, + struct cred *cred) +{ + struct xmemnode *parent = (struct xmemnode *)VTOXN(dvp); + struct xmemnode *self = (struct xmemnode *)NULL; + struct xmount *xm = (struct xmount *)VTOXM(dvp); + char *cp = NULL; + int error; + size_t len; + + error = xdirlookup(parent, lnm, &self, cred); + if (error == 0) { + /* + * The entry already exists + */ + xmemnode_rele(self); + return (EEXIST); /* was 0 */ + } + + if (error != ENOENT) { + if (self != NULL) + xmemnode_rele(self); + return (error); + } + + rw_enter(&parent->xn_rwlock, RW_WRITER); + error = xdirenter(xm, parent, lnm, DE_CREATE, (struct xmemnode *)NULL, + (struct xmemnode *)NULL, tva, &self, cred); + rw_exit(&parent->xn_rwlock); + + if (error) { + if (self) + xmemnode_rele(self); + return (error); + } + len = strlen(tnm) + 1; + cp = xmem_memalloc(len, 0); + if (cp == NULL) { + xmemnode_rele(self); + return (ENOSPC); + } + (void) strcpy(cp, tnm); + + self->xn_symlink = cp; + self->xn_size = len - 1; + xmemnode_rele(self); + return (error); +} + +/* ARGSUSED2 */ +static int +xmem_readlink(struct vnode *vp, struct uio *uiop, struct cred *cred) +{ + struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); + int error = 0; + + if (vp->v_type != VLNK) + return (EINVAL); + + rw_enter(&xp->xn_rwlock, RW_READER); + rw_enter(&xp->xn_contents, RW_READER); + error = uiomove(xp->xn_symlink, xp->xn_size, UIO_READ, uiop); + gethrestime(&xp->xn_atime); + rw_exit(&xp->xn_contents); + rw_exit(&xp->xn_rwlock); + return (error); +} + +/* ARGSUSED */ +static int +xmem_fsync(struct vnode *vp, int syncflag, struct cred *cred) +{ + return (0); +} + +/* ARGSUSED */ +static void +xmem_inactive(struct vnode *vp, struct cred *cred) +{ + struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); + struct xmount *xm = (struct xmount *)VFSTOXM(vp->v_vfsp); + + rw_enter(&xp->xn_rwlock, RW_WRITER); +top: + mutex_enter(&xp->xn_tlock); + mutex_enter(&vp->v_lock); + ASSERT(vp->v_count >= 1); + + /* + * If we don't have the last hold or the link count is non-zero, + * there's little to do -- just drop our hold. + */ + if (vp->v_count > 1 || xp->xn_nlink != 0) { + vp->v_count--; + mutex_exit(&vp->v_lock); + mutex_exit(&xp->xn_tlock); + rw_exit(&xp->xn_rwlock); + return; + } + + /* + * We have the last hold *and* the link count is zero, so this + * xmemnode is dead from the filesystem's viewpoint. However, + * if the xmemnode has any pages associated with it (i.e. if it's + * a normal file with non-zero size), the xmemnode can still be + * discovered by pageout or fsflush via the page vnode pointers. + * In this case we must drop all our locks, truncate the xmemnode, + * and try the whole dance again. + */ + if (xp->xn_size != 0) { + if (xp->xn_type == VREG) { + mutex_exit(&vp->v_lock); + mutex_exit(&xp->xn_tlock); + rw_enter(&xp->xn_contents, RW_WRITER); + (void) xmemnode_trunc(xm, xp, 0); + rw_exit(&xp->xn_contents); + ASSERT(xp->xn_size == 0); + ASSERT(xp->xn_nblocks == 0); + goto top; + } + if (xp->xn_type == VLNK) + xmem_memfree(xp->xn_symlink, xp->xn_size + 1); + } + + mutex_exit(&vp->v_lock); + mutex_exit(&xp->xn_tlock); + mutex_enter(&xm->xm_contents); + if (xp->xn_forw == NULL) + xm->xm_rootnode->xn_back = xp->xn_back; + else + xp->xn_forw->xn_back = xp->xn_back; + xp->xn_back->xn_forw = xp->xn_forw; + mutex_exit(&xm->xm_contents); + rw_exit(&xp->xn_rwlock); + rw_destroy(&xp->xn_rwlock); + mutex_destroy(&xp->xn_tlock); + vn_free(xp->xn_vnode); + xmem_memfree(xp, sizeof (struct xmemnode)); +} + +static int +xmem_fid(struct vnode *vp, struct fid *fidp) +{ + struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); + struct xfid *xfid; + + if (fidp->fid_len < (sizeof (struct xfid) - sizeof (ushort_t))) { + fidp->fid_len = sizeof (struct xfid) - sizeof (ushort_t); + return (ENOSPC); + } + + xfid = (struct xfid *)fidp; + bzero(xfid, sizeof (struct xfid)); + xfid->xfid_len = (int)sizeof (struct xfid) - sizeof (ushort_t); + + xfid->xfid_ino = xp->xn_nodeid; + xfid->xfid_gen = xp->xn_gen; + + return (0); +} + + +/* + * Return all the pages from [off..off+len] in given file + */ +static int +xmem_getpage(struct vnode *vp, offset_t off, size_t len, uint_t *protp, + page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, + enum seg_rw rw, struct cred *cr) +{ + int err = 0; + struct xmemnode *xp = VTOXN(vp); + struct xmount *xm = (struct xmount *)VTOXM(vp); + timestruc_t now; + + cmn_err(CE_PANIC, "xmem_getpage"); + rw_enter(&xp->xn_contents, RW_READER); + + if (off + len > xp->xn_size + xm->xm_bsize) { + rw_exit(&xp->xn_contents); + return (EFAULT); + } + rw_exit(&xp->xn_contents); + + if (len <= xm->xm_bsize) + err = xmem_getapage(vp, (u_offset_t)off, len, protp, pl, plsz, + seg, addr, rw, cr); + else + err = pvn_getpages(xmem_getapage, vp, (u_offset_t)off, len, + protp, pl, plsz, seg, addr, rw, cr); + + rw_enter(&xp->xn_contents, RW_WRITER); + gethrestime(&now); + xp->xn_atime = now; + if (rw == S_WRITE) + xp->xn_mtime = now; + rw_exit(&xp->xn_contents); + + return (err); +} + +/* + * Called from pvn_getpages to get a particular page. + */ +/*ARGSUSED*/ +static int +xmem_getapage(struct vnode *vp, u_offset_t off, size_t len, uint_t *protp, + page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, + enum seg_rw rw, struct cred *cr) +{ + cmn_err(CE_PANIC, "xmem_getapage"); + return (0); +} + +/* ARGSUSED */ +int +xmem_putpage(struct vnode *vp, offset_t off, size_t len, int flags, + struct cred *cr) +{ + return (0); +} + +#ifndef lint +/* + * Write out a single page. + * For xmemfs this means choose a physical swap slot and write the page + * out using VOP_PAGEIO. For performance, we attempt to kluster; i.e., + * we try to find a bunch of other dirty pages adjacent in the file + * and a bunch of contiguous swap slots, and then write all the pages + * out in a single i/o. + */ +/*ARGSUSED*/ +static int +xmem_putapage(struct vnode *vp, page_t *pp, u_offset_t *offp, + size_t *lenp, int flags, struct cred *cr) +{ + cmn_err(CE_PANIC, "xmem putapage"); + return (1); +} +#endif + + +static int +xmem_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp, + size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, + struct cred *cred) +{ + struct seg *seg; + struct segxmem_crargs xmem_a; + struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); + struct xmount *xm = (struct xmount *)VTOXM(vp); + uint_t blocknumber; + int error; + +#ifdef lint + maxprot = maxprot; +#endif + if (vp->v_flag & VNOMAP) + return (ENOSYS); + + if (off < 0) + return (EINVAL); + + /* offset, length and address has to all be block aligned */ + + if (off & (xm->xm_bsize - 1) || len & (xm->xm_bsize - 1) || + ((ulong_t)*addrp) & (xm->xm_bsize - 1)) { + + return (EINVAL); + } + + if (vp->v_type != VREG) + return (ENODEV); + + if (flags & MAP_PRIVATE) + return (EINVAL); /* XXX need to be handled */ + + /* + * Don't allow mapping to locked file + */ + if (vn_has_mandatory_locks(vp, xp->xn_mode)) { + return (EAGAIN); + } + + if (error = xmem_fillpages(xp, vp, off, len, 1)) { + return (error); + } + + blocknumber = off >> xm->xm_bshift; + + if (flags & MAP_FIXED) { + /* + * User specified address - blow away any previous mappings + */ + AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); + seg = as_findseg(as, *addrp, 0); + + /* + * Fast path. segxmem_remap will fail if this is the wrong + * segment or if the len is beyond end of seg. If it fails, + * we do the regular stuff thru as_* routines. + */ + + if (seg && (segxmem_remap(seg, vp, *addrp, len, + &xp->xn_ppa[blocknumber], prot) == 0)) { + AS_LOCK_EXIT(as, &as->a_lock); + return (0); + } + AS_LOCK_EXIT(as, &as->a_lock); + if (seg) + (void) as_unmap(as, *addrp, len); + + as_rangelock(as); + + error = valid_usr_range(*addrp, len, prot, as, as->a_userlimit); + + if (error != RANGE_OKAY || + as_gap(as, len, addrp, &len, AH_CONTAIN, *addrp)) { + as_rangeunlock(as); + return (EINVAL); + } + + } else { + as_rangelock(as); + map_addr(addrp, len, (offset_t)off, 1, flags); + } + + if (*addrp == NULL) { + as_rangeunlock(as); + return (ENOMEM); + } + + xmem_a.xma_vp = vp; + xmem_a.xma_offset = (u_offset_t)off; + xmem_a.xma_prot = prot; + xmem_a.xma_cred = cred; + xmem_a.xma_ppa = &xp->xn_ppa[blocknumber]; + xmem_a.xma_bshift = xm->xm_bshift; + + error = as_map(as, *addrp, len, segxmem_create, &xmem_a); + + as_rangeunlock(as); + return (error); +} + +/* ARGSUSED */ +static int +xmem_addmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr, + size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, + struct cred *cred) +{ + return (0); +} + +/* ARGSUSED */ +static int +xmem_delmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr, + size_t len, uint_t prot, uint_t maxprot, uint_t flags, + struct cred *cred) +{ + return (0); +} + +static int +xmem_freesp(struct vnode *vp, struct flock64 *lp, int flag) +{ + register int i; + register struct xmemnode *xp = VTOXN(vp); + int error; + + ASSERT(vp->v_type == VREG); + ASSERT(lp->l_start >= 0); + + if (lp->l_len != 0) + return (EINVAL); + + rw_enter(&xp->xn_rwlock, RW_WRITER); + if (xp->xn_size == lp->l_start) { + rw_exit(&xp->xn_rwlock); + return (0); + } + + /* + * Check for any mandatory locks on the range + */ + if (MANDLOCK(vp, xp->xn_mode)) { + long save_start; + + save_start = lp->l_start; + + if (xp->xn_size < lp->l_start) { + /* + * "Truncate up" case: need to make sure there + * is no lock beyond current end-of-file. To + * do so, we need to set l_start to the size + * of the file temporarily. + */ + lp->l_start = xp->xn_size; + } + lp->l_type = F_WRLCK; + lp->l_sysid = 0; + lp->l_pid = ttoproc(curthread)->p_pid; + i = (flag & (FNDELAY|FNONBLOCK)) ? 0 : SLPFLCK; + if ((i = reclock(vp, lp, i, 0, lp->l_start, NULL)) != 0 || + lp->l_type != F_UNLCK) { + rw_exit(&xp->xn_rwlock); + return (i ? i : EAGAIN); + } + + lp->l_start = save_start; + } + + rw_enter(&xp->xn_contents, RW_WRITER); + error = xmemnode_trunc((struct xmount *)VFSTOXM(vp->v_vfsp), + xp, lp->l_start); + rw_exit(&xp->xn_contents); + rw_exit(&xp->xn_rwlock); + return (error); +} + +/* ARGSUSED */ +static int +xmem_space(struct vnode *vp, int cmd, struct flock64 *bfp, int flag, + offset_t offset, struct cred *cred, caller_context_t *ct) +{ + int error; + + if (cmd != F_FREESP) + return (EINVAL); + if ((error = convoff(vp, bfp, 0, (offset_t)offset)) == 0) { + if ((bfp->l_start > MAXOFF_T) || (bfp->l_len > MAXOFF_T)) + return (EFBIG); + error = xmem_freesp(vp, bfp, flag); + } + return (error); +} + +/* ARGSUSED */ +static int +xmem_seek(struct vnode *vp, offset_t ooff, offset_t *noffp) +{ + return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); +} + +/* ARGSUSED2 */ +static int +xmem_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp) +{ + struct xmemnode *xp = VTOXN(vp); + + if (write_lock) { + rw_enter(&xp->xn_rwlock, RW_WRITER); + } else { + rw_enter(&xp->xn_rwlock, RW_READER); + } + return (write_lock); +} + +/* ARGSUSED1 */ +static void +xmem_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp) +{ + struct xmemnode *xp = VTOXN(vp); + + rw_exit(&xp->xn_rwlock); +} + +struct vnodeops *xmem_vnodeops; + +const fs_operation_def_t xmem_vnodeops_template[] = { + VOPNAME_OPEN, xmem_open, + VOPNAME_CLOSE, xmem_close, + VOPNAME_READ, xmem_read, + VOPNAME_WRITE, xmem_write, + VOPNAME_IOCTL, xmem_ioctl, + VOPNAME_GETATTR, xmem_getattr, + VOPNAME_SETATTR, xmem_setattr, + VOPNAME_ACCESS, xmem_access, + VOPNAME_LOOKUP, xmem_lookup, + VOPNAME_CREATE, xmem_create, + VOPNAME_REMOVE, xmem_remove, + VOPNAME_LINK, xmem_link, + VOPNAME_RENAME, xmem_rename, + VOPNAME_MKDIR, xmem_mkdir, + VOPNAME_RMDIR, xmem_rmdir, + VOPNAME_READDIR, xmem_readdir, + VOPNAME_SYMLINK, xmem_symlink, + VOPNAME_READLINK, xmem_readlink, + VOPNAME_FSYNC, xmem_fsync, + VOPNAME_INACTIVE, (fs_generic_func_p) xmem_inactive, + VOPNAME_FID, xmem_fid, + VOPNAME_RWLOCK, xmem_rwlock, + VOPNAME_RWUNLOCK, (fs_generic_func_p) xmem_rwunlock, + VOPNAME_SEEK, xmem_seek, + VOPNAME_SPACE, xmem_space, + VOPNAME_GETPAGE, xmem_getpage, + VOPNAME_PUTPAGE, xmem_putpage, + VOPNAME_MAP, (fs_generic_func_p) xmem_map, + VOPNAME_ADDMAP, (fs_generic_func_p) xmem_addmap, + VOPNAME_DELMAP, xmem_delmap, + NULL, NULL +};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deleted_files/usr/src/uts/intel/sys/fs/seg_xmem.h Thu Jan 18 16:23:02 2007 -0800 @@ -0,0 +1,108 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_FS_SEG_XMEM_H +#define _SYS_FS_SEG_XMEM_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/map.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Statistics for segxmem operations. + * + * No explicit locking to protect these stats. + */ +struct segxmemcnt { + kstat_named_t sx_fault; /* number of segxmem_faults */ + kstat_named_t sx_getmapflt; /* number of segxmem_getmaps */ + kstat_named_t sx_release; /* releases with */ + kstat_named_t sx_pagecreate; /* pagecreates */ +}; + + +#if defined(_KERNEL) + +struct segxmem_crargs { + struct vnode *xma_vp; /* vnode maped from */ + u_offset_t xma_offset; /* starting offset for mapping */ + /* offset above could be invalid due to remaps, but ppa will be valid */ + page_t ***xma_ppa; /* page list for this mapping */ + uint_t xma_bshift; /* for converting offset to block # */ + struct cred *xma_cred; /* credentials */ + uchar_t xma_prot; +}; + + +struct sx_prot { + struct sx_prot *spc_next; /* Next such one */ + pgcnt_t spc_pageindex; /* First page with changed prot */ + pgcnt_t spc_numpages; /* & number of such pages */ + uchar_t spc_prot; +}; + +struct segxmem_data { + struct vnode *sxd_vp; /* vnode for this mapping */ + offset_t sxd_offset; /* & initial offset */ + /* + * The above may not be valid after remap, but ppa below will track + * the remaps. + */ + size_t sxd_bsize; /* block size */ + uint_t sxd_bshift; /* for converting offset to block # */ + size_t sxd_softlockcnt; + struct sx_prot *sxd_spc; /* linked list of changed protections */ + uchar_t sxd_prot; +}; + +#define sx_blocks(seg, sxd) howmany((seg)->s_size, 1 << (sxd)->sxd_bshift) + +/* + * Public seg_xmem segment operations. + */ +extern int segxmem_create(struct seg *, struct segxmem_crargs *); +/* + * extern faultcode_t segxmem_fault(struct hat *, struct seg *, caddr_t, size_t, + * enum fault_type, enum seg_rw); + */ +extern caddr_t segxmem_getmap(struct map *, struct vnode *, u_offset_t, + size_t, page_t **, enum seg_rw); +extern void segxmem_release(struct map *, caddr_t, size_t); +extern int segxmem_remap(struct seg *, struct vnode *vp, caddr_t, size_t, + page_t ***, uchar_t); +extern void segxmem_inval(struct seg *, struct vnode *, u_offset_t); + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_FS_SEG_XMEM_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deleted_files/usr/src/uts/intel/sys/fs/xmem.h Thu Jan 18 16:23:02 2007 -0800 @@ -0,0 +1,282 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_FS_XMEM_H +#define _SYS_FS_XMEM_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/t_lock.h> +#include <vm/seg.h> +#include <vm/seg_vn.h> +#include <sys/fs/seg_xmem.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _KERNEL + +/* + * xmemnode is the file system dependent node for xmemfs. + * + * xn_rwlock protects access of the directory list at xn_dir + * as well as syncronizing read and writes to the xmemnode + * + * xn_contents protects growing, shrinking, reading and writing + * the file along with xn_rwlock (see below). + * + * xn_tlock protects updates to xn_mode and xn_nlink + * + * xm_contents in the xmount filesystem data structure protects + * xn_forw and xn_back which are used to maintain a linked + * list of all xmemfs files associated with that file system + * + * XXX - valid ? The pp array represents the store for xmemfs. + * To grow or shrink the file or fill in holes requires + * manipulation of the pp array. These operations are protected + * by a combination of xn_rwlock and xn_contents. Growing or shrinking + * the array requires the write lock on xn_rwlock and xn_contents. + * Filling in a slot in the array requires the write lock on xn_contents. + * Reading the array requires the read lock on xn_contents. + * + * The ordering of the locking is: + * xn_rwlock -> xn_contents -> page locks on pages in file + * + * xn_tlock doesn't require any xmemnode locks + */ + +struct xmemnode { + struct xmemnode *xn_back; /* linked list of xmemnodes */ + struct xmemnode *xn_forw; /* linked list of xmemnodes */ + union { + struct { + struct xdirent *un_dirlist; /* dirent list */ + uint_t un_dirents; /* number of dirents */ + } un_dirstruct; + char *un_symlink; /* pointer to symlink */ + struct { + page_t ***un_ppa; /* page backing for file */ + size_t un_size; /* size repres. by array */ + } un_ppstruct; + } un_xmemnode; + struct vnode *xn_vnode; /* vnode for this xmemnode */ + int xn_gen; /* pseudo gen number for xfid */ + struct vattr xn_attr; /* attributes */ + krwlock_t xn_contents; /* vm side -serialize mods */ + krwlock_t xn_rwlock; /* rw,trunc size - serialize */ + /* mods and directory updates */ + kmutex_t xn_tlock; /* time, flag, and nlink lock */ +}; + +/* + * each xn_ppa[] entry points to an array of page_t pointers. + */ +#define xn_ppa un_xmemnode.un_ppstruct.un_ppa +#define xn_ppasz un_xmemnode.un_ppstruct.un_size +#define xn_dir un_xmemnode.un_dirstruct.un_dirlist +#define xn_dirents un_xmemnode.un_dirstruct.un_dirents +#define xn_symlink un_xmemnode.un_symlink + +/* + * Attributes + */ +#define xn_mask xn_attr.va_mask +#define xn_type xn_attr.va_type +#define xn_mode xn_attr.va_mode +#define xn_uid xn_attr.va_uid +#define xn_gid xn_attr.va_gid +#define xn_fsid xn_attr.va_fsid +#define xn_nodeid xn_attr.va_nodeid +#define xn_nlink xn_attr.va_nlink +#define xn_size xn_attr.va_size +#define xn_atime xn_attr.va_atime +#define xn_mtime xn_attr.va_mtime +#define xn_ctime xn_attr.va_ctime +#define xn_rdev xn_attr.va_rdev +#define xn_blksize xn_attr.va_blksize +#define xn_nblocks xn_attr.va_nblocks +#define xn_seq xn_attr.va_seq + +/* + * xmemfs directories are made up of a linked list of xdirent structures + * hanging off directory xmemnodes. File names are not fixed length, + * but are null terminated. + */ +struct xdirent { + struct xmemnode *xd_xmemnode; /* xmemnode for this file */ + struct xdirent *xd_next; /* next directory entry */ + struct xdirent *xd_prev; /* prev directory entry */ + uint_t xd_offset; /* "offset" of dir entry */ + uint_t xd_hash; /* a hash of xd_name */ + struct xdirent *xd_link; /* linked via the hash table */ + struct xmemnode *xd_parent; /* parent, dir we are in */ + char *xd_name; /* must be null terminated */ + /* max length is MAXNAMELEN */ +}; + +/* + * xfid overlays the fid structure (for VFS_VGET) + */ +struct xfid { + uint16_t xfid_len; + ino32_t xfid_ino; + int32_t xfid_gen; +}; + +#define ESAME (-1) /* trying to rename linked files (special) */ + +extern struct vnodeops *xmem_vnodeops; +extern const struct fs_operation_def xmem_vnodeops_template[]; + +/* + * xmemfs per-mount data structure. + * + * All fields are protected by xm_contents. + * File renames on a particular file system are protected xm_renamelck. + */ +struct xmount { + struct vfs *xm_vfsp; /* filesystem's vfs struct */ + struct xmemnode *xm_rootnode; /* root xmemnode */ + char *xm_mntpath; /* name of xmemfs mount point */ + uint_t xm_flags; /* Miscellaneous Flags */ + size_t xm_bsize; /* block size for this file system */ + uint_t xm_bshift; /* for converting offset to block # */ + pgcnt_t xm_ppb; /* pages per block */ + struct map *xm_map; /* Map for kernel addresses */ + caddr_t xm_mapaddr; /* Base of above map */ + size_t xm_mapsize; /* size of above map */ + caddr_t xm_vmmapaddr; /* Base of heap for above map */ + size_t xm_vmmapsize; /* size of heap for above map */ + ulong_t xm_max; /* file system max reservation */ + pgcnt_t xm_mem; /* pages of reserved memory */ + dev_t xm_dev; /* unique dev # of mounted `device' */ + uint_t xm_gen; /* pseudo generation number for files */ + kmutex_t xm_contents; /* lock for xmount structure */ + kmutex_t xm_renamelck; /* rename lock for this mount */ + uint_t xm_xpgcnt; /* index and count for xpg_array */ + void **xm_xpgarray; /* array of pointers */ +}; + +#ifndef DEBUG +#define XMEMPRINTF(level, args) +#else +extern int xmemlevel; +/*PRINTFLIKE1*/ +extern void xmemprintf(const char *, ...) + __KPRINTFLIKE(1); +#define XMEMPRINTF(level, args) if (level >= xmemlevel) xmemprintf args +#endif + +#endif /* _KERNEL */ + +#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 +#pragma pack(4) +#endif + +/* + * Make sizeof struct xmemfs_args the same on x86 and amd64. + */ + +struct xmemfs_args { + offset_t xa_fssize; /* file system size in bytes */ + offset_t xa_bsize; /* block size for this file system */ + uint_t xa_flags; /* flags for this mount */ +}; + +#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 +#pragma pack() +#endif + +/* Flag bits */ +#define XARGS_RESERVEMEM 1 /* pre reserve memory */ +#define XARGS_LARGEPAGES 2 /* Use large pages */ + +#ifdef _KERNEL + +/* + * File system independent to xmemfs conversion macros + */ +#define VFSTOXM(vfsp) ((struct xmount *)(vfsp)->vfs_data) +#define VTOXM(vp) ((struct xmount *)(vp)->v_vfsp->vfs_data) +#define VTOXN(vp) ((struct xmemnode *)(vp)->v_data) +#define XNTOV(xp) ((xp)->xn_vnode) +#define xmemnode_hold(tp) VN_HOLD(XNTOV(tp)) +#define xmemnode_rele(tp) VN_RELE(XNTOV(tp)) + +/* + * enums + */ +enum de_op { DE_CREATE, DE_MKDIR, DE_LINK, DE_RENAME }; /* direnter ops */ +enum dr_op { DR_REMOVE, DR_RMDIR, DR_RENAME }; /* dirremove ops */ + +/* + * xmemfs_minfree is the amount (in pages) of memory that xmemfs + * leaves free for the rest of the system. + * NB: If xmemfs allocates too much space, other processes will be + * unable to execute. 320 is chosen arbitrarily to be about right for + * an RDBMS environment with all of it's buffers coming from xmemfs. + */ +#define XMEMMINFREE 320 * 1024 * 1024 /* 320 Megabytes */ +/* + * number of simultaneous reads/writes is limited by NUM_SIMULMAPS + * below. We cannot set it much higher as we expect typical block + * size to be 2MB or 4MB and we cannot afford to reserve and keep + * too much kernel virtual memory for ourselves. + */ +#define SEGXMEM_NUM_SIMULMAPS 4 + +extern pgcnt_t xmemfs_minfree; /* memory in pages */ + +extern void xmemnode_init(struct xmount *, struct xmemnode *, + struct vattr *, struct cred *); +extern int xmemnode_trunc(struct xmount *, struct xmemnode *, u_offset_t); +extern int xdirlookup(struct xmemnode *, char *, struct xmemnode **, + struct cred *); +extern int xdirdelete(struct xmemnode *, struct xmemnode *, char *, + enum dr_op, struct cred *); +extern void xdirinit(struct xmemnode *, struct xmemnode *); +extern void xdirtrunc(struct xmemnode *); +extern void *xmem_memalloc(size_t, int); +extern void xmem_memfree(void *, size_t); +extern int xmem_xaccess(void *, int, struct cred *); +extern int xdirenter(struct xmount *, struct xmemnode *, char *, + enum de_op, struct xmemnode *, struct xmemnode *, struct vattr *, + struct xmemnode **, struct cred *); +extern int xmem_fillpages(struct xmemnode *, struct vnode *, offset_t, + offset_t, int); +extern int xmem_sticky_remove_access(struct xmemnode *, struct xmemnode *, + struct cred *); + +#endif /* _KERNEL */ + +#define XMEM_MUSTHAVE 1 + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_FS_XMEM_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deleted_files/usr/src/uts/intel/xmemfs/Makefile Thu Jan 18 16:23:02 2007 -0800 @@ -0,0 +1,94 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# uts/intel/xmemfs/Makefile +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# +# This makefile drives the production of the xmemfs file system +# kernel module. +# +# x86 architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = xmemfs +OBJECTS = $(XMEMFS_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(XMEMFS_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(USR_FS_DIR)/$(MODULE) + +INC_PATH += -I../../i86pc + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +# +# For now, disable these lint checks; maintainers should endeavor +# to investigate and remove these for maximum lint coverage. +# Please do not carry these forward to new Makefiles. +# +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN +LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW +LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ
--- a/usr/src/Makefile.lint Thu Jan 18 14:25:26 2007 -0800 +++ b/usr/src/Makefile.lint Thu Jan 18 16:23:02 2007 -0800 @@ -421,7 +421,6 @@ i386_SUBDIRS= \ cmd/biosdev \ - cmd/fs.d/xmemfs \ cmd/rtc \ lib/brand/lx \ lib/cfgadm_plugins/sata
--- a/usr/src/cmd/fs.d/Makefile Thu Jan 18 14:25:26 2007 -0800 +++ b/usr/src/cmd/fs.d/Makefile Thu Jan 18 16:23:02 2007 -0800 @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -46,11 +46,8 @@ SUBDIR1= lofs zfs SUBDIR2= dev fd pcfs nfs hsfs proc ctfs udfs ufs tmpfs cachefs autofs mntfs objfs -i386_SUBDIRS= xmemfs -i386_I18NDIRS= xmemfs -SUBDIRS= $(SUBDIR1) $(SUBDIR2) $($(MACH)_SUBDIRS) -ALL_SUBDIRS= $(SUBDIR1) $(SUBDIR2) $(i386_SUBDIRS) -I18NDIRS= $(SUBDIR2) $(i386_I18NDIRS) +SUBDIRS= $(SUBDIR1) $(SUBDIR2) +I18NDIRS= $(SUBDIR2) CLOBBERFILES += $(POFILES_XPG4) @@ -195,11 +192,11 @@ $(ROOTUSRSBINFF): $(ROOTUSRSBIN)/ff -$(RM) $@; $(SYMLINK) ./ff $@ -clean: $(ALL_SUBDIRS) .WAIT clean_local +clean: $(SUBDIRS) .WAIT clean_local clean_local: -clobber: $(ALL_SUBDIRS) .WAIT clobber_local +clobber: $(SUBDIRS) .WAIT clobber_local clobber_local: clean_local $(RM) $(PROG) $(ROOTFS_PROG) $(SPPROG) $(MNTTAB) $(DEFAULTFILES) \ @@ -207,7 +204,7 @@ lint: -$(ALL_SUBDIRS): FRC +$(SUBDIRS): FRC @cd $@; pwd; $(MAKE) $(MFLAGS) $(TARGET) FRC:
--- a/usr/src/cmd/fs.d/xmemfs/Makefile Thu Jan 18 14:25:26 2007 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,37 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -#ident "%Z%%M% %I% %E% SMI" -# -# Copyright 2003 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# - -FSTYPE= xmemfs -LIBPROG= mount - -include ../Makefile.fstype -include ../Makefile.mount - -CPPFLAGS += -I../../../uts/intel -I../../../uts/i86pc - -include ../Makefile.mount.targ
--- a/usr/src/cmd/fs.d/xmemfs/mount.c Thu Jan 18 14:25:26 2007 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,330 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <stdio.h> -#include <signal.h> -#include <string.h> -#include <unistd.h> -#include <errno.h> -#include <sys/mntent.h> -#include <sys/mnttab.h> -#include <sys/mntent.h> -#include <sys/mount.h> -#include <sys/fs/xmem.h> -#include <sys/types.h> -#include <locale.h> -#include <sys/stat.h> -#include <sys/statvfs.h> -#include <fslib.h> -#include <stdlib.h> - -enum { - FSSIZE, - VERBOSE, - LARGEBSIZE, -#ifdef DEBUG - NOLARGEBSIZE, - BSIZE, - RESERVEMEM, - NORESERVEMEM, -#endif - XOPTSZ -}; - -static char *myopts[] = { - "size", /* required */ - "vb", - "largebsize", -#ifdef DEBUG - "nolargebsize", /* default */ - "bsize", /* internal use only */ - "reservemem", /* default */ - "noreservemem", -#endif - NULL -}; - -static offset_t -atosz(char *optarg) -{ - offset_t off; - char *endptr; - - off = strtoll(optarg, &endptr, 0); - - switch (*endptr) { - case 't': case 'T': - off *= 1024; - /* FALLTHROUGH */ - case 'g': case 'G': - off *= 1024; - /* FALLTHROUGH */ - case 'm': case 'M': - off *= 1024; - /* FALLTHROUGH */ - case 'k': case 'K': - off *= 1024; - /* FALLTHROUGH */ - default: - break; - } - return (off); -} - - -int -main(int argc, char *argv[]) -{ - struct mnttab mnt; - int c; - char *myname; - char optbuf[MAX_MNTOPT_STR]; - char typename[64]; - char *options, *value; - int error = 0; - int verbose = 0; - int nmflg = 0; - offset_t fssize = 0; - offset_t bsize = 0; - int optsize = sizeof (struct xmemfs_args); - int mflg = 0; - int optcnt = 0; - int qflg = 0; - char *saveopt; - struct xmemfs_args xargs = { - 0, /* xa_fssize - file system sz */ - 0, /* xa_bsize - blk sz */ - XARGS_RESERVEMEM /* xa_flags */ - }; - - (void) setlocale(LC_ALL, ""); - -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) textdomain(TEXT_DOMAIN); - - myname = strrchr(argv[0], '/'); - myname = myname ? myname + 1 : argv[0]; - (void) snprintf(typename, sizeof (typename), "%s_%s", MNTTYPE_XMEMFS, - myname); - argv[0] = typename; - - /* RO xmemfs not supported... */ - (void) strlcpy(optbuf, "rw", sizeof (optbuf)); - - while ((c = getopt(argc, argv, "Vqo:mO")) != EOF) { - switch (c) { - case 'q': - qflg++; - break; - case 'V': - verbose++; - break; - case 'm': - nmflg++; - mflg |= MS_NOMNTTAB; - break; - case 'O': - mflg |= MS_OVERLAY; - break; - case 'o': - options = optarg; - while (*options != '\0') { - saveopt = options; - - switch (getsubopt(&options, myopts, &value)) { - case LARGEBSIZE: - xargs.xa_flags |= XARGS_LARGEPAGES; - break; - case FSSIZE: - if (value) { - fssize = atosz(value); - if (!fssize) { - (void) fprintf(stderr, -gettext("%s: value %s for option \"%s\" is invalid\n"), -typename, value, myopts[FSSIZE]); - error++; - break; - } - xargs.xa_fssize = fssize; - optcnt++; - if (verbose) - (void) fprintf(stderr, -gettext("setting fssize to %d\n"), fssize); - } else { - (void) fprintf(stderr, -gettext("%s: option \"%s\" requires value\n"), typename, myopts[FSSIZE]); - error++; - } - break; -#ifdef DEBUG - case RESERVEMEM: - xargs.xa_flags |= XARGS_RESERVEMEM; - break; - case NORESERVEMEM: - xargs.xa_flags &= ~XARGS_RESERVEMEM; - break; - case NOLARGEBSIZE: - xargs.xa_flags &= ~XARGS_LARGEPAGES; - break; - case BSIZE: /* file system block size */ - if (value) { - bsize = atosz(value); - if (!bsize) { - (void) fprintf(stderr, -gettext("%s: value %s for option \"%s\" is invalid\n"), -typename, value, myopts[FSSIZE]); - error++; - break; - } - xargs.xa_bsize = bsize; - optcnt++; - if (verbose) - (void) fprintf(stderr, -gettext("setting bsize to %d\n"), bsize); - } else { - (void) fprintf(stderr, -gettext("%s: option \"%s\" requires value\n"), typename, myopts[BSIZE]); - error++; - } - break; -#endif - - case VERBOSE: - verbose++; - break; - default: - if (fsisstdopt(saveopt)) { - (void) strlcat(optbuf, ",", - sizeof (optbuf)); - (void) strlcat(optbuf, - saveopt, sizeof (optbuf)); - break; - } - if (!qflg) { - (void) fprintf(stderr, gettext( - "%s: WARNING: ignoring " - "option \"%s\"\n"), - typename, saveopt); - } - - break; - } - } - if (bsize) { - (void) snprintf(optbuf, sizeof (optbuf), - "%s,bsize=%lld", optbuf, bsize); - if (--optcnt) - (void) strlcat(optbuf, ",", - sizeof (optbuf)); - if (verbose) - (void) fprintf(stderr, "optbuf:%s\n", - optbuf); - } - if (fssize) { - (void) snprintf(optbuf, sizeof (optbuf), - "%s,size=%lld", optbuf, fssize); - if (--optcnt) - (void) strlcat(optbuf, ",", - sizeof (optbuf)); - if (verbose) - (void) fprintf(stderr, "optbuf:%s\n", - optbuf); - } else { - error++; - } - if (options[0] && !error) { - (void) strlcat(optbuf, options, - sizeof (optbuf)); - if (verbose) - (void) fprintf(stderr, "optbuf:%s\n", - optbuf); - } - if (verbose) - (void) fprintf(stderr, "optsize:%d optbuf:%s\n", - optsize, optbuf); - break; - default: - error++; - break; - } - } - - if (verbose && !error) { - char *optptr; - - (void) fprintf(stderr, "%s", typename); - for (optcnt = 1; optcnt < argc; optcnt++) { - optptr = argv[optcnt]; - if (optptr) - (void) fprintf(stderr, " %s", optptr); - } - (void) fprintf(stderr, "\n"); - } - - if (argc - optind != 2 || error) { - (void) fprintf(stderr, - gettext("Usage: %s -o[largebsize,]size=sz" - " xmem mount_point\n"), typename); - exit(1); - } - - mnt.mnt_special = argv[optind++]; - mnt.mnt_mountp = argv[optind++]; - mnt.mnt_fstype = MNTTYPE_XMEMFS; - mflg |= MS_DATA | MS_OPTIONSTR; - mnt.mnt_mntopts = optbuf; - - saveopt = strdup(optbuf); - - if (verbose) { - (void) fprintf(stderr, "mount(%s, \"%s\", %d, %s", - mnt.mnt_special, mnt.mnt_mountp, mflg, MNTTYPE_XMEMFS); - if (optsize) - (void) fprintf(stderr, ", \"%s\", %d)\n", - optbuf, strlen(optbuf)); - else - (void) fprintf(stderr, ")\n"); - } - if (mount(mnt.mnt_special, mnt.mnt_mountp, mflg, MNTTYPE_XMEMFS, - &xargs, optsize, optbuf, MAX_MNTOPT_STR)) { - if (errno == EBUSY) - (void) fprintf(stderr, - gettext("mount: %s already mounted\n"), - mnt.mnt_mountp); - else - perror("mount"); - exit(1); - } - - if (!qflg && saveopt != NULL) - cmp_requested_to_actual_options(saveopt, optbuf, - mnt.mnt_special, mnt.mnt_mountp); - - return (0); -}
--- a/usr/src/cmd/zoneadm/zoneadm.c Thu Jan 18 14:25:26 2007 -0800 +++ b/usr/src/cmd/zoneadm/zoneadm.c Thu Jan 18 16:23:02 2007 -0800 @@ -926,8 +926,7 @@ rpath); return (Z_ERR); } - if ((strcmp(stbuf.st_fstype, MNTTYPE_TMPFS) == 0) || - (strcmp(stbuf.st_fstype, MNTTYPE_XMEMFS) == 0)) { + if (strcmp(stbuf.st_fstype, MNTTYPE_TMPFS) == 0) { (void) printf(gettext("WARNING: %s is on a temporary " "file system.\n"), rpath); }
--- a/usr/src/pkgdefs/SUNWcsu/prototype_i386 Thu Jan 18 14:25:26 2007 -0800 +++ b/usr/src/pkgdefs/SUNWcsu/prototype_i386 Thu Jan 18 16:23:02 2007 -0800 @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -69,7 +69,6 @@ f none usr/kernel/exec/javaexec 755 root sys f none usr/kernel/fs/fdfs 755 root sys f none usr/kernel/fs/pcfs 755 root sys -f none usr/kernel/fs/xmemfs 755 root sys f none usr/kernel/sched/FX 755 root sys f none usr/kernel/sched/FX_DPTBL 755 root sys f none usr/kernel/sched/IA 755 root sys @@ -82,8 +81,6 @@ f none usr/kernel/sys/exacctsys 755 root sys f none usr/kernel/sys/sysacct 755 root sys f none usr/lib/devfsadm/linkmod/SUNW_misc_link_i386.so 755 root sys -d none usr/lib/fs/xmemfs 755 root sys -f none usr/lib/fs/xmemfs/mount 555 root bin s none usr/sbin/installgrub=../../sbin/installgrub f none usr/sbin/rtc 555 root bin d none usr/sbin/i86 755 root bin @@ -126,7 +123,6 @@ d none usr/kernel/fs/amd64 755 root sys f none usr/kernel/fs/amd64/fdfs 755 root sys f none usr/kernel/fs/amd64/pcfs 755 root sys -f none usr/kernel/fs/amd64/xmemfs 755 root sys d none usr/kernel/pcbe/amd64 755 root sys d none usr/kernel/sched/amd64 755 root sys f none usr/kernel/sched/amd64/FX 755 root sys
--- a/usr/src/pkgdefs/SUNWhea/prototype_i386 Thu Jan 18 14:25:26 2007 -0800 +++ b/usr/src/pkgdefs/SUNWhea/prototype_i386 Thu Jan 18 16:23:02 2007 -0800 @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -83,8 +83,6 @@ f none usr/include/sys/dktp/fdisk.h 644 root bin f none usr/include/sys/dma_engine.h 644 root bin f none usr/include/sys/fp.h 644 root bin -f none usr/include/sys/fs/seg_xmem.h 644 root bin -f none usr/include/sys/fs/xmem.h 644 root bin d none usr/include/sys/i2o 755 root bin f none usr/include/sys/i2o/i2omsg.h 644 root bin f none usr/include/sys/i2o/i2outil.h 644 root bin
--- a/usr/src/tools/scripts/bfu.sh Thu Jan 18 14:25:26 2007 -0800 +++ b/usr/src/tools/scripts/bfu.sh Thu Jan 18 16:23:02 2007 -0800 @@ -6184,6 +6184,15 @@ # Remove audit_record_attr. Moved to /usr/lib/security rm -f $root/etc/security/audit_record_attr + # + # Remove xmemfs altogether. + # + rm -f $usr/include/sys/fs/xmem.h + rm -f $usr/include/sys/fs/seg_xmem.h + rm -f $usr/kernel/fs/xmemfs + rm -f $usr/kernel/fs/amd64/xmemfs + rm -rf $usr/lib/fs/xmemfs + # End of pre-archive extraction hacks. if [ $diskless = no -a $zone = global ]; then
--- a/usr/src/uts/common/sys/mntent.h Thu Jan 18 14:25:26 2007 -0800 +++ b/usr/src/uts/common/sys/mntent.h Thu Jan 18 16:23:02 2007 -0800 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * * Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T @@ -54,7 +54,6 @@ #define MNTTYPE_TMPFS "tmpfs" /* Tmp volatile file system */ #define MNTTYPE_AUTOFS "autofs" /* Automounter ``file'' system */ #define MNTTYPE_MNTFS "mntfs" /* In-kernel mnttab */ -#define MNTTYPE_XMEMFS "xmemfs" /* Extended memory FS, IA32 only */ #define MNTTYPE_DEV "dev" /* /dev file system */ #define MNTTYPE_CTFS "ctfs" /* Contract file system */ #define MNTTYPE_OBJFS "objfs" /* Kernel object file system */
--- a/usr/src/uts/intel/Makefile.files Thu Jan 18 14:25:26 2007 -0800 +++ b/usr/src/uts/intel/Makefile.files Thu Jan 18 16:23:02 2007 -0800 @@ -22,7 +22,7 @@ # # uts/intel/Makefile.files # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -101,13 +101,6 @@ CORE_OBJS += \ prmachdep.o -XMEMFS_OBJS += \ - seg_xmem.o \ - xmem_dir.o \ - xmem_subr.o \ - xmem_vfsops.o \ - xmem_vnops.o - LX_PROC_OBJS += \ lx_prsubr.o \ lx_prvfsops.o \
--- a/usr/src/uts/intel/Makefile.intel.shared Thu Jan 18 14:25:26 2007 -0800 +++ b/usr/src/uts/intel/Makefile.intel.shared Thu Jan 18 16:23:02 2007 -0800 @@ -431,7 +431,7 @@ # FS_KMODS += autofs cachefs ctfs dev devfs fdfs fifofs hsfs lofs FS_KMODS += lx_afs lx_proc mntfs namefs nfs objfs zfs -FS_KMODS += pcfs procfs sockfs specfs tmpfs udfs ufs xmemfs +FS_KMODS += pcfs procfs sockfs specfs tmpfs udfs ufs # # Streams Modules (/kernel/strmod):
--- a/usr/src/uts/intel/Makefile.rules Thu Jan 18 14:25:26 2007 -0800 +++ b/usr/src/uts/intel/Makefile.rules Thu Jan 18 16:23:02 2007 -0800 @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -60,10 +60,6 @@ $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) -$(OBJS_DIR)/%.o: $(UTSBASE)/intel/fs/xmemfs/%.c - $(COMPILE.c) -o $@ $< - $(CTFCONVERT_O) - $(OBJS_DIR)/%.o: $(UTSBASE)/intel/ia32/ml/%.s $(COMPILE.s) -o $@ $< @@ -160,9 +156,6 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/intel/fs/proc/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) -$(LINTS_DIR)/%.ln: $(UTSBASE)/intel/fs/xmemfs/%.c - @($(LHEAD) $(LINT.c) $< $(LTAIL)) - $(LINTS_DIR)/%.ln: $(UTSBASE)/intel/ia32/ml/%.s @($(LHEAD) $(LINT.s) $< $(LTAIL))
--- a/usr/src/uts/intel/fs/xmemfs/seg_xmem.c Thu Jan 18 14:25:26 2007 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,823 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ -/* All Rights Reserved */ - -/* - * Portions of this source code were derived from Berkeley 4.3 BSD - * under license from the Regents of the University of California. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * The segxmem driver is used by the xmemfs to get faster (than seg_map) - * mappings [lower routine overhead] to random vnode/offsets. - * Mappings are made to a very limited kernel address range and to a - * potentially much larger user address range. It is the speed of mmap - * and munmaps to the user address space that we are concerned with. - * We also need to ensure very low overhead for I/O similar to seg_spt - */ - -#include <sys/types.h> -#include <sys/t_lock.h> -#include <sys/param.h> -#include <sys/sysmacros.h> -#include <sys/buf.h> -#include <sys/systm.h> -#include <sys/vnode.h> -#include <sys/mman.h> -#include <sys/errno.h> -#include <sys/cred.h> -#include <sys/kmem.h> -#include <sys/vtrace.h> -#include <sys/cmn_err.h> -#include <sys/debug.h> -#include <sys/thread.h> -#include <sys/dumphdr.h> -#include <sys/map.h> -#include <sys/atomic.h> - -#include <vm/seg_kmem.h> -#include <vm/seg_vn.h> -#include <vm/hat.h> -#include <vm/as.h> -#include <vm/seg.h> -#include <vm/page.h> -#include <vm/pvn.h> -#include <vm/rm.h> -#include <sys/vfs.h> -#include <sys/fs/seg_xmem.h> -#include <sys/fs/xmem.h> -#include <sys/lgrp.h> - -/* - * Private seg op routines. - */ -static void segxmem_free(struct seg *seg); -static int segxmem_dup(struct seg *seg, struct seg *newseg); -static int segxmem_unmap(struct seg *seg, caddr_t raddr, size_t ssize); -static faultcode_t segxmem_fault(struct hat *hat, struct seg *seg, caddr_t addr, - size_t len, enum fault_type type, enum seg_rw rw); -static int segxmem_setprot(struct seg *seg, caddr_t addr, size_t len, - uint_t prot); -static int segxmem_checkprot(struct seg *seg, caddr_t addr, size_t len, - uint_t prot); -static size_t segxmem_incore(struct seg *seg, caddr_t addr, size_t len, - register char *vec); -static int segxmem_sync(struct seg *seg, register caddr_t addr, size_t len, - int attr, uint_t flags); -static int segxmem_lockop(struct seg *seg, caddr_t addr, size_t len, - int attr, int op, ulong_t *lockmap, size_t pos); -static int segxmem_getprot(struct seg *seg, caddr_t addr, size_t len, - uint_t *protv); -static u_offset_t segxmem_getoffset(struct seg *seg, caddr_t addr); -static int segxmem_gettype(struct seg *seg, caddr_t addr); -static int segxmem_getvp(struct seg *, caddr_t, struct vnode **); -static int segxmem_advise(struct seg *seg, caddr_t addr, size_t len, - uint_t behav); -static void segxmem_dump(struct seg *seg); -static int segxmem_pagelock(struct seg *seg, caddr_t addr, size_t len, - struct page ***ppp, enum lock_type type, - enum seg_rw rw); -static int segxmem_setpgsz(struct seg *, caddr_t, size_t, uint_t); -static int segxmem_getmemid(struct seg *, caddr_t, memid_t *); - -#define SEGXMEM_NULLOP(t) (t(*)())NULL - -static struct seg_ops segxmem_ops = { - segxmem_dup, /* dup */ - segxmem_unmap, - segxmem_free, - segxmem_fault, /* Change if HAT_DYNAMIC_ISM_UNMAP suported */ - SEGXMEM_NULLOP(int), /* faulta */ - segxmem_setprot, - segxmem_checkprot, - SEGXMEM_NULLOP(int), /* kluster */ - SEGXMEM_NULLOP(size_t), /* swapout */ - segxmem_sync, /* sync */ - segxmem_incore, /* incore */ - segxmem_lockop, /* lockop */ - segxmem_getprot, - segxmem_getoffset, - segxmem_gettype, - segxmem_getvp, - segxmem_advise, /* advise */ - segxmem_dump, - segxmem_pagelock, /* pagelock */ - segxmem_setpgsz, - segxmem_getmemid, /* getmemid */ - SEGXMEM_NULLOP(lgrp_mem_policy_info_t *), /* getpolicy */ -}; - - -/* - * Statistics for segxmem operations. - * - * No explicit locking to protect these stats. - */ -struct segxmemcnt segxmemcnt = { - { "fault", KSTAT_DATA_ULONG }, - { "getmap", KSTAT_DATA_ULONG }, - { "pagecreate", KSTAT_DATA_ULONG } -}; - -kstat_named_t *segxmemcnt_ptr = (kstat_named_t *)&segxmemcnt; -uint_t segxmemcnt_ndata = sizeof (segxmemcnt) / sizeof (kstat_named_t); - - -int segxmem_DR = -1; /* Indicate if hat supports DR */ - -int remap_broken = 0; - - -int -segxmem_create(struct seg *seg, struct segxmem_crargs *xmem_a) -{ - struct segxmem_data *sxd; - uint_t prot; - caddr_t taddr; - uint_t blocknumber, lastblock; - page_t ***ppa; - struct hat *hat; - size_t tlen; - - ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); - - if (((uintptr_t)seg->s_base | seg->s_size) & PAGEOFFSET) - panic("segxmem not PAGESIZE aligned"); - - sxd = kmem_zalloc(sizeof (struct segxmem_data), KM_SLEEP); - - seg->s_data = (void *)sxd; - seg->s_ops = &segxmem_ops; - - sxd->sxd_prot = xmem_a->xma_prot; - sxd->sxd_vp = xmem_a->xma_vp; - sxd->sxd_offset = xmem_a->xma_offset; - sxd->sxd_bshift = xmem_a->xma_bshift; - sxd->sxd_bsize = 1 << xmem_a->xma_bshift; - - blocknumber = 0; - lastblock = (seg->s_size - 1) >> sxd->sxd_bshift; - taddr = seg->s_base; - tlen = sxd->sxd_bsize; - ppa = xmem_a->xma_ppa; - hat = seg->s_as->a_hat; - prot = xmem_a->xma_prot; - while (blocknumber <= lastblock) { - page_t **ppp; - - if (VTOXM(sxd->sxd_vp)->xm_ppb == 1) - ppp = (page_t **)ppa; - else - ppp = *ppa; - - hat_memload_array(hat, taddr, tlen, ppp, prot | HAT_NOSYNC, - HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST); - - blocknumber++; - ppa++; - taddr += tlen; - } - - return (0); -} - -static void -segxmem_free(seg) - struct seg *seg; -{ - struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; - ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); - kmem_free(sxd, sizeof (struct segxmem_data)); - -} - -static int -segxmem_dup(struct seg *seg, struct seg *newseg) -{ - struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; - struct segxmem_data *newsxd; - caddr_t vaddr; - ulong_t pfn; - page_t *pp, **ppa; - int i; - int ppb; - - newsxd = kmem_zalloc(sizeof (struct segxmem_data), KM_SLEEP); - - newsxd->sxd_vp = sxd->sxd_vp; - newsxd->sxd_offset = sxd->sxd_offset; - newsxd->sxd_bsize = sxd->sxd_bsize; - newsxd->sxd_bshift = sxd->sxd_bshift; - newsxd->sxd_prot = sxd->sxd_prot; - - newsxd->sxd_softlockcnt = sxd->sxd_softlockcnt; - - newseg->s_ops = &segxmem_ops; - newseg->s_data = (void *)newsxd; - - ppb = btop(sxd->sxd_bsize); - if (ppb > 1) - ppa = kmem_alloc(ppb * sizeof (page_t *), KM_SLEEP); - else - ppa = &pp; - - for (vaddr = seg->s_base; vaddr < seg->s_base + seg->s_size; - vaddr += sxd->sxd_bsize) { - - /* ### sxd->sxd_vp->xn_ppa[(vaddr - s_base)]->p_pagenum */ - - pfn = hat_getpfnum(seg->s_as->a_hat, vaddr); - - if (pfn == PFN_INVALID) - continue; - - for (i = 0; i < ppb; i++) { - ppa[i] = page_numtopp_nolock(pfn); - pfn++; - } - hat_memload_array(newseg->s_as->a_hat, vaddr, sxd->sxd_bsize, - ppa, sxd->sxd_prot | HAT_NOSYNC, - HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST); - } - if (ppb > 1) - kmem_free(ppa, ppb * sizeof (page_t *)); - - return (0); -} - -/* - * This routine is called via a machine specific fault handling - * routine. It is also called by software routines wishing to - * lock or unlock a range of addresses. - */ -static faultcode_t -segxmem_fault( - struct hat *hat, - struct seg *seg, - caddr_t addr, - size_t len, - enum fault_type type, - enum seg_rw rw) -{ - struct segxmem_data *sxd; - size_t npages = btopr(len); - -#ifdef lint - hat = hat; - addr = addr; -#endif - - ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); - - sxd = (struct segxmem_data *)seg->s_data; - - ASSERT(addr >= seg->s_base); - ASSERT(((addr + len) - seg->s_base) <= seg->s_size); - - switch (type) { - - case F_SOFTLOCK: - - /* - * Because we know that every shared memory is - * already locked and called in the same context. - */ - atomic_add_long(&sxd->sxd_softlockcnt, npages); - return (0); - - case F_SOFTUNLOCK: - - atomic_add_long(&sxd->sxd_softlockcnt, -npages); - - /* - * Check for softlock - */ - if (sxd->sxd_softlockcnt == 0) { - /* - * All SOFTLOCKS are gone. Wakeup any waiting - * unmappers so they can try again to unmap. - * As an optimization check for waiters first - * without the mutex held, so we're not always - * grabbing it on softunlocks. - */ - if (AS_ISUNMAPWAIT(seg->s_as)) { - mutex_enter(&seg->s_as->a_contents); - if (AS_ISUNMAPWAIT(seg->s_as)) { - AS_CLRUNMAPWAIT(seg->s_as); - cv_broadcast(&seg->s_as->a_cv); - } - mutex_exit(&seg->s_as->a_contents); - } - } - return (0); - - case F_INVAL: - - if ((rw == S_EXEC) && !(sxd->sxd_prot & PROT_EXEC)) - return (FC_NOMAP); - - /* - * all xmem pages should already be mapped - desired mapping - * unknown - */ - - panic("xmem page fault"); - /*NOTREACHED*/ - - case F_PROT: - /* - * We can get away with this because ISM segments are - * always rw. Other than this unusual case, there - * should be no instances of protection violations. - */ - return (0); - - default: - XMEMPRINTF(8, ("segxmem_fault: type %x\n", type)); - return (FC_NOMAP); - } -} - -static int -segxmem_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) -{ - struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; - - ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); - - if (seg->s_base == addr && seg->s_size == len) { - sxd->sxd_prot = prot; - hat_chgprot(seg->s_as->a_hat, addr, len, prot); - } else { - return (IE_NOMEM); - } - return (0); -} - -/*ARGSUSED*/ -static int -segxmem_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) -{ - struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; - - ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); - - /* - * Need not acquire the segment lock since - * "sxd_prot" is a read-only field. - */ - return (((sxd->sxd_prot & prot) != prot) ? EACCES : 0); -} - -static int -segxmem_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) -{ - struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; - size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1; - - ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); - - if (pgno != 0) { - do - protv[--pgno] = sxd->sxd_prot; - while (pgno != 0); - } - return (0); -} - -static u_offset_t -segxmem_getoffset(struct seg *seg, caddr_t addr) -{ - register struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; - - ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); - - return ((u_offset_t)sxd->sxd_offset + (addr - seg->s_base)); -} - -/*ARGSUSED*/ -static int -segxmem_gettype(struct seg *seg, caddr_t addr) -{ - ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); - - return (MAP_SHARED); -} - -/*ARGSUSED*/ -static int -segxmem_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp) -{ - register struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; - - ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); - - *vpp = sxd->sxd_vp; - return (0); -} - -#ifndef lint /* currently unused */ -/* - * Check to see if it makes sense to do kluster/read ahead to - * addr + delta relative to the mapping at addr. We assume here - * that delta is a signed PAGESIZE'd multiple (which can be negative). - * - * For segxmem we always "approve" of this action from our standpoint. - */ -/*ARGSUSED*/ -static int -segxmem_kluster(struct seg *seg, caddr_t addr, ssize_t delta) -{ - return (0); -} - -static void -segxmem_badop() -{ - panic("segxmem_badop"); - /*NOTREACHED*/ -} - -#endif - -/* - * Special public segxmem operations - */ - - -void -segxmem_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw) -{ - page_t *pp; - struct segxmem_data *sxd = (struct segxmem_data *)(seg->s_data); - struct vnode *vp = sxd->sxd_vp; - u_offset_t off = sxd->sxd_offset; - caddr_t eaddr; - - ASSERT(seg->s_as == &kas); - - panic("segxmem_pageunlock"); - - eaddr = addr + len; - addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); - - for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { - hat_unlock(kas.a_hat, addr, PAGESIZE); - - /* - * Use page_find() instead of page_lookup() to - * find the page since we know that it has - * "exclusive" lock. - */ - pp = page_find(vp, off); - if (pp == NULL) - panic("segxmem_pageunlock"); - if (rw == S_WRITE) { - hat_setrefmod(pp); - } else if (rw != S_OTHER) { - hat_setref(pp); - } - - page_unlock(pp); - } -} - -/* - * segxmem_getmap allocates from the map an address range to map the vnode vp - * in the range <off, off + len). - * - * If pagecreate is nonzero, segxmem_getmap will create the page(s). - * calls hat_memload_array to load the translations. - * **ppa can be NULL if pagecreate is 0. - */ -caddr_t -segxmem_getmap(struct map *map, struct vnode *vp, u_offset_t off, size_t len, - page_t **ppa, enum seg_rw rw) -{ - caddr_t baseaddr; - uint_t attr = (rw == S_WRITE)?PROT_WRITE|PROT_READ:PROT_READ; - -#ifdef lint - vp = vp; - off = off; -#endif - - segxmemcnt.sx_getmapflt.value.ul++; - - baseaddr = (caddr_t)rmalloc_wait(map, len); - - hat_memload_array(kas.a_hat, baseaddr, len, ppa, attr | HAT_NOSYNC, - HAT_LOAD); - - return (baseaddr); -} - -void -segxmem_release(struct map *map, caddr_t addr, size_t len) -{ - - hat_unload(kas.a_hat, addr, len, HAT_UNLOAD_NOSYNC); - rmfree(map, len, (ulong_t)addr); -} - -int -segxmem_remap(struct seg *seg, struct vnode *vp, caddr_t addr, size_t len, - page_t ***ppa, uchar_t prot) -{ - struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; - uint_t blocknumber, lastblock, flags; - caddr_t taddr; - size_t tlen; - - ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); - - if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size || - (seg->s_ops != &segxmem_ops) || (sxd->sxd_vp != vp)) - return (1); /* Fail */ - - ASSERT(sxd->sxd_prot == prot); /* remove this later */ - - /* aligned addr and length */ - - blocknumber = (addr - seg->s_base) >> sxd->sxd_bshift; - lastblock = (addr + len - 1 - seg->s_base) >> sxd->sxd_bshift; - taddr = addr; - tlen = sxd->sxd_bsize; - while (blocknumber <= lastblock) { - - /* - * entire xmem segment mapped on mmap() call - if in the - * segment range(checked above), there should be a mapping - * therefore flags always HAT_LOAD_REMAP. - * - */ - if (hat_getpfnum(seg->s_as->a_hat, taddr) != PFN_INVALID) { -#ifdef DEBUG - if (remap_broken) - hat_unload(seg->s_as->a_hat, taddr, - tlen, HAT_UNLOAD); -#endif - - /* - * assume the hat would leave mapping HAT_LOAD_LOCK'ed - * on REMAP. - */ - flags = HAT_LOAD | HAT_LOAD_NOCONSIST | HAT_LOAD_REMAP; - } else { - XMEMPRINTF(4, - ("segxmem_remap: taddr %p pfn inv\n", - (void *)taddr)); - flags = HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST; - } - - prot |= HAT_NOSYNC; - - if (btop(sxd->sxd_bsize) == 1) - hat_memload_array(seg->s_as->a_hat, taddr, tlen, - (page_t **)ppa, prot, flags); - else - hat_memload_array(seg->s_as->a_hat, taddr, tlen, *ppa, - prot, flags); - - blocknumber++; - ppa++; - taddr += tlen; - } - return (0); -} - -/* ARGSUSED */ -static int -segxmem_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags) -{ - ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); - - return (0); -} - -/* - * segxmem pages are always "in core" since the memory is locked down. - */ -/* ARGSUSED */ -static size_t -segxmem_incore(struct seg *seg, caddr_t addr, size_t len, char *vec) -{ - - caddr_t eo_seg; - - ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); -#ifdef lint - seg = seg; -#endif - - eo_seg = addr + len; - while (addr < eo_seg) { - /* page exist, and it's locked. */ - *vec++ = (char)0x9; - addr += PAGESIZE; - } - return (len); -} - -static int segxmem_advise(struct seg *seg, caddr_t addr, size_t len, - uint_t behav) -{ -#ifdef lint - seg = seg; - addr = addr; - len = len; - behav = behav; -#endif - return (0); -} - -/* - * called from as_ctl(, MC_LOCK,) - * - */ -/* ARGSUSED */ -static int -segxmem_lockop(struct seg *seg, caddr_t addr, size_t len, int attr, - int op, ulong_t *lockmap, size_t pos) -{ - ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); - /* - * for spt, as->a_paglck is never set - * so this routine should not be called. - */ - return (0); -} - -static int -segxmem_unmap(struct seg *seg, caddr_t addr, size_t ssize) -{ - struct segxmem_data *sxd, *nsxd; - struct seg *nseg; - caddr_t segend, delsegend; - - XMEMPRINTF(1, ("segxmem_unmap: seg %p addr %p size %lx\n", - (void *)seg, (void *)addr, ssize)); - - ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); - - hat_unload(seg->s_as->a_hat, addr, ssize, HAT_UNLOAD_UNLOCK); - if (addr == seg->s_base && ssize == seg->s_size) { - seg_free(seg); - return (0); - } - sxd = (struct segxmem_data *)seg->s_data; - - /* partial unmap of the segment - begin, end and middle */ - - /* check for deleting at the beginning */ - - if (addr == seg->s_base) { - seg->s_base += ssize; - seg->s_size -= ssize; - return (0); - } - delsegend = addr + ssize; - segend = seg->s_base + seg->s_size; - - /* check for deleting at the end */ - if (delsegend == segend) { - seg->s_size -= ssize; - return (0); - } - - /* Now for the tough one. Make a new one at end and cut the current */ - - seg->s_size = addr - seg->s_base; /* adjust original segment */ - - nseg = seg_alloc(seg->s_as, delsegend, segend - delsegend); - if (nseg == NULL) - panic("segxmem seg_alloc"); - - nsxd = kmem_zalloc(sizeof (struct segxmem_data), KM_SLEEP); - - nsxd->sxd_vp = sxd->sxd_vp; - nsxd->sxd_offset = sxd->sxd_offset; /* unused */ - nsxd->sxd_bsize = sxd->sxd_bsize; - nsxd->sxd_bshift = sxd->sxd_bshift; - nsxd->sxd_prot = sxd->sxd_prot; - nsxd->sxd_softlockcnt = sxd->sxd_softlockcnt; /* ### */ - - nseg->s_ops = &segxmem_ops; - nseg->s_data = (void *)nsxd; - - return (0); -} - -/* - * Dump the pages belonging to this segxmem segment. - */ -static void -segxmem_dump(struct seg *seg) -{ - struct segxmem_data *sxd; - caddr_t addr; - int i, j; - uint_t nblocks; - pgcnt_t npages; - - sxd = (struct segxmem_data *)seg->s_data; - nblocks = howmany(seg->s_size, sxd->sxd_bsize); - npages = nblocks << (sxd->sxd_bshift - PAGESHIFT); - addr = seg->s_base; - - /* XXX figure out if we need something else here */ - for (i = 0; i < nblocks; i++) { - pfn_t pfn = hat_getpfnum(seg->s_as->a_hat, addr); - - for (j = 0; j < npages; j++) { - dump_addpage(seg->s_as, addr, pfn); - pfn++; - addr += PAGESIZE; - } - } -} -/*ARGSUSED*/ -static int -segxmem_setpgsz(struct seg *seg, caddr_t addr, size_t len, uint_t szc) -{ - return (ENOTSUP); -} - -static int -segxmem_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp) -{ - struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; - - memidp->val[0] = (uintptr_t)sxd->sxd_vp; - memidp->val[1] = sxd->sxd_offset + (uintptr_t)(addr - seg->s_base); - return (0); -} - -/*ARGSUSED*/ -static int -segxmem_pagelock(struct seg *seg, caddr_t addr, size_t len, - struct page ***ppp, enum lock_type type, enum seg_rw rw) -{ - return (ENOTSUP); -} - -#define XMEMBUFSZ 16384 -#define XMEMPAD 128 /* larger than max len xmem string */ - -char xmembuf[XMEMBUFSZ + XMEMPAD]; -uint_t xmembufi; -int xmemlevel = 4; - -void -xmemprintf(const char *fmt, ...) -{ - va_list args; - int len; - char localbuf[XMEMPAD]; - uint_t newval, oldxmembufi; - - va_start(args, fmt); - - len = snprintf(localbuf, INT_MAX, "%d: ", (int)CPU->cpu_id); - len += vsnprintf(localbuf + len, INT_MAX, fmt, args); - - ASSERT(len < XMEMPAD); - - do { - oldxmembufi = xmembufi; - newval = oldxmembufi + len; - if (newval > XMEMBUFSZ) - newval = 0; - } while (cas32(&xmembufi, oldxmembufi, newval) != oldxmembufi); - - bcopy(localbuf, xmembuf + oldxmembufi, len); - - va_end(args); -}
--- a/usr/src/uts/intel/fs/xmemfs/xmem_dir.c Thu Jan 18 14:25:26 2007 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1025 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/sysmacros.h> -#include <sys/systm.h> -#include <sys/time.h> -#include <sys/vfs.h> -#include <sys/vnode.h> -#include <sys/errno.h> -#include <sys/cmn_err.h> -#include <sys/cred.h> -#include <sys/stat.h> -#include <sys/debug.h> -#include <sys/policy.h> -#include <sys/fs/xmem.h> - -static int xdircheckpath(struct xmemnode *, struct xmemnode *, struct cred *); -static int xdirrename(struct xmemnode *, struct xmemnode *, struct xmemnode *, - char *, struct xmemnode *, struct xdirent *, struct cred *); -static void xdirfixdotdot(struct xmemnode *, struct xmemnode *, - struct xmemnode *); -static int xdirmakexnode(struct xmemnode *, struct xmount *, - struct vattr *, enum de_op, struct xmemnode **, struct cred *); -static int xdiraddentry(struct xmemnode *, struct xmemnode *, char *, - enum de_op, struct xmemnode *); - - -#define X_HASH_SIZE 8192 /* must be power of 2 */ -#define X_MUTEX_SIZE 64 - -static struct xdirent *x_hashtable[X_HASH_SIZE]; -static kmutex_t x_hashmutex[X_MUTEX_SIZE]; - -#define X_HASH_INDEX(a) ((a) & (X_HASH_SIZE-1)) -#define X_MUTEX_INDEX(a) ((a) & (X_MUTEX_SIZE-1)) - -#define XMEMFS_HASH(xp, name, hash) \ - { \ - char Xc, *Xcp; \ - hash = ((uintptr_t)(xp)) >> 8; \ - for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++) \ - hash = (hash << 4) + hash + (uint_t)Xc; \ - } - -void -xmemfs_hash_init(void) -{ - int ix; - - for (ix = 0; ix < X_MUTEX_SIZE; ix++) - mutex_init(&x_hashmutex[ix], NULL, MUTEX_DEFAULT, NULL); -} - -/* - * This routine is where the rubber meets the road for identities. - */ -static void -xmemfs_hash_in(struct xdirent *x) -{ - uint_t hash; - struct xdirent **prevpp; - kmutex_t *t_hmtx; - - XMEMFS_HASH(x->xd_parent, x->xd_name, hash); - x->xd_hash = hash; - prevpp = &x_hashtable[X_HASH_INDEX(hash)]; - t_hmtx = &x_hashmutex[X_MUTEX_INDEX(hash)]; - mutex_enter(t_hmtx); - x->xd_link = *prevpp; - *prevpp = x; - mutex_exit(t_hmtx); -} - -/* - * Remove xdirent *t from the hash list. - */ -static void -xmemfs_hash_out(struct xdirent *x) -{ - uint_t hash; - struct xdirent **prevpp; - kmutex_t *t_hmtx; - - hash = x->xd_hash; - prevpp = &x_hashtable[X_HASH_INDEX(hash)]; - t_hmtx = &x_hashmutex[X_MUTEX_INDEX(hash)]; - mutex_enter(t_hmtx); - while (*prevpp != x) - prevpp = &(*prevpp)->xd_link; - *prevpp = x->xd_link; - mutex_exit(t_hmtx); -} - -static struct xdirent * -xmemfs_hash_lookup(char *name, struct xmemnode *parent, uint_t hold, - struct xmemnode **found) -{ - struct xdirent *l; - uint_t hash; - kmutex_t *t_hmtx; - struct xmemnode *xp; - - XMEMFS_HASH(parent, name, hash); - t_hmtx = &x_hashmutex[X_MUTEX_INDEX(hash)]; - mutex_enter(t_hmtx); - l = x_hashtable[X_HASH_INDEX(hash)]; - while (l) { - if ((l->xd_hash == hash) && - (l->xd_parent == parent) && - (strcmp(l->xd_name, name) == 0)) { - /* - * We need to make sure that the xmemnode that - * we put a hold on is the same one that we pass back. - * Hence, temporary variable xp is necessary. - * The right way to fix this would be to add the t_hmtx - * lock acquisition to callers like tdirrename, so - * that this race condition doesn't occur. But - * this "fix" is simpler, and less of a performance - * impact. - */ - xp = l->xd_xmemnode; - if (hold) { - ASSERT(xp); - xmemnode_hold(xp); - } - if (found) - *found = xp; - mutex_exit(t_hmtx); - return (l); - } else { - l = l->xd_link; - } - } - mutex_exit(t_hmtx); - return (NULL); -} - -/* - * Search directory 'parent' for entry 'name'. - * - * The calling thread can't hold the write version - * of the rwlock for the directory being searched - * - * 0 is returned on success and *foundxp points - * to the found xmemnode with its vnode held. - */ -int -xdirlookup( - struct xmemnode *parent, - char *name, - struct xmemnode **foundxp, - struct cred *cred) -{ - int error; - - *foundxp = NULL; - if (parent->xn_type != VDIR) - return (ENOTDIR); - - if ((error = xmem_xaccess(parent, VEXEC, cred))) - return (error); - - if (*name == '\0') { - xmemnode_hold(parent); - *foundxp = parent; - return (0); - } - - /* - * Search the directory for the matching name - * We need the lock protecting the xn_dir list - * so that it doesn't change out from underneath us. - * xmemfs_hash_lookup() will pass back the xmemnode - * with a hold on it. - */ - - if (xmemfs_hash_lookup(name, parent, 1, foundxp) != NULL) { - ASSERT(*foundxp); - return (0); - } - - return (ENOENT); -} - -/* - * Enter a directory entry for 'name' and 'xp' into directory 'dir' - * - * Returns 0 on success. - */ -int -xdirenter( - struct xmount *xm, - struct xmemnode *dir, /* target directory to make entry in */ - char *name, /* name of entry */ - enum de_op op, /* entry operation */ - struct xmemnode *fromparent, /* source directory if rename */ - struct xmemnode *xp, /* source xmemnode, if link/rename */ - struct vattr *va, - struct xmemnode **xpp, /* return xmemnode, if create/mkdir */ - struct cred *cred) -{ - struct xdirent *xdp; - struct xmemnode *found = NULL; - int error = 0; - char *s; - - /* - * xn_rwlock is held to serialize direnter and dirdeletes - */ - ASSERT(RW_WRITE_HELD(&dir->xn_rwlock)); - ASSERT(dir->xn_type == VDIR); - - /* - * Don't allow '/' characters in pathname component - * (thus in ufs_direnter()). - */ - for (s = name; *s; s++) - if (*s == '/') - return (EACCES); - - ASSERT(name[0] != '\0'); - - /* - * For link and rename lock the source entry and check the link count - * to see if it has been removed while it was unlocked. - */ - if (op == DE_LINK || op == DE_RENAME) { - mutex_enter(&xp->xn_tlock); - if (xp->xn_nlink == 0) { - mutex_exit(&xp->xn_tlock); - return (ENOENT); - } - - if (xp->xn_nlink == MAXLINK) { - mutex_exit(&xp->xn_tlock); - return (EMLINK); - } - xp->xn_nlink++; - mutex_exit(&xp->xn_tlock); - gethrestime(&xp->xn_ctime); - } - - /* - * This might be a "dangling detached directory". - * it could have been removed, but a reference - * to it kept in u_cwd. don't bother searching - * it, and with any luck the user will get tired - * of dealing with us and cd to some absolute - * pathway. *sigh*, thus in ufs, too. - */ - if (dir->xn_nlink == 0) { - error = ENOENT; - goto out; - } - - /* - * If this is a rename of a directory and the parent is - * different (".." must be changed), then the source - * directory must not be in the directory hierarchy - * above the target, as this would orphan everything - * below the source directory. - */ - if (op == DE_RENAME) { - if (xp == dir) { - error = EINVAL; - goto out; - } - if (xp->xn_type == VDIR) { - if ((fromparent != dir) && - (error = xdircheckpath(xp, dir, cred))) { - goto out; - } - } - } - - /* - * Search for the entry. Return "found" if it exists. - */ - xdp = xmemfs_hash_lookup(name, dir, 1, &found); - - if (xdp) { - ASSERT(found); - switch (op) { - case DE_CREATE: - case DE_MKDIR: - if (xpp) { - *xpp = found; - error = EEXIST; - } else { - xmemnode_rele(found); - } - break; - - case DE_RENAME: - error = xdirrename(fromparent, xp, - dir, name, found, xdp, cred); - xmemnode_rele(found); - break; - - case DE_LINK: - /* - * Can't link to an existing file. - */ - error = EEXIST; - xmemnode_rele(found); - break; - } - } else { - - /* - * The entry does not exist. Check write permission in - * directory to see if entry can be created. - */ - if (error = xmem_xaccess(dir, VWRITE, cred)) - goto out; - if (op == DE_CREATE || op == DE_MKDIR) { - /* - * Make new xmemnode and directory entry as required. - */ - error = xdirmakexnode(dir, xm, va, op, &xp, cred); - if (error) - goto out; - } - if (error = xdiraddentry(dir, xp, name, op, fromparent)) { - if (op == DE_CREATE || op == DE_MKDIR) { - /* - * Unmake the inode we just made. - */ - rw_enter(&xp->xn_rwlock, RW_WRITER); - if ((xp->xn_type) == VDIR) { - ASSERT(xdp == NULL); - /* - * cleanup allocs made by xdirinit() - */ - xdirtrunc(xp); - } - mutex_enter(&xp->xn_tlock); - xp->xn_nlink = 0; - mutex_exit(&xp->xn_tlock); - gethrestime(&xp->xn_ctime); - rw_exit(&xp->xn_rwlock); - xmemnode_rele(xp); - xp = NULL; - } - } else if (xpp) { - *xpp = xp; - } else if (op == DE_CREATE || op == DE_MKDIR) { - xmemnode_rele(xp); - } - } -out: - if (error && (op == DE_LINK || op == DE_RENAME)) { - /* - * Undo bumped link count. - */ - DECR_COUNT(&xp->xn_nlink, &xp->xn_tlock); - gethrestime(&xp->xn_ctime); - } - return (error); -} - -/* - * Delete entry xp of name "nm" from dir. - * Free dir entry space and decrement link count on xmemnode(s). - * - * Return 0 on success. - */ -int -xdirdelete( - struct xmemnode *dir, - struct xmemnode *xp, - char *nm, - enum dr_op op, - struct cred *cred) -{ - register struct xdirent *tpdp; - int error; - size_t namelen; - struct xmemnode *xptmp; - timestruc_t now; - - ASSERT(RW_WRITE_HELD(&dir->xn_rwlock)); - ASSERT(RW_WRITE_HELD(&xp->xn_rwlock)); - ASSERT(dir->xn_type == VDIR); - - ASSERT(nm[0] != '\0'); - - /* - * return error when removing . and .. - */ - if (nm[0] == '.') { - if (nm[1] == '\0') - return (EINVAL); - if (nm[1] == '.' && nm[2] == '\0') - return (EEXIST); /* thus in ufs */ - } - - if (error = xmem_xaccess(dir, VEXEC|VWRITE, cred)) - return (error); - - /* - * If the parent directory is "sticky", then the user must - * own the parent directory or the file in it, or else must - * have permission to write the file. Otherwise it may not - * be deleted (except by privileged users). Same as ufs_dirremove. - */ - if (error = xmem_sticky_remove_access(dir, xp, cred)) - return (error); - - if (dir->xn_dir == NULL) - return (ENOENT); - - tpdp = xmemfs_hash_lookup(nm, dir, 0, &xptmp); - if (tpdp == NULL) { - /* - * If it is gone, some other thread got here first! - * Return error ENOENT. - */ - return (ENOENT); - } - - /* - * If the xmemnode in the xdirent changed, we were probably - * the victim of a concurrent rename operation. The original - * is gone, so return that status (same as UFS). - */ - if (xp != xptmp) - return (ENOENT); - - xmemfs_hash_out(tpdp); - - /* - * Take tpdp out of the directory list. - */ - ASSERT(tpdp->xd_next != tpdp); - ASSERT(tpdp->xd_prev != tpdp); - if (tpdp->xd_prev) { - tpdp->xd_prev->xd_next = tpdp->xd_next; - } - if (tpdp->xd_next) { - tpdp->xd_next->xd_prev = tpdp->xd_prev; - } - - /* - * If the roving slot pointer happens to match tpdp, - * point it at the previous dirent. - */ - if (dir->xn_dir->xd_prev == tpdp) { - dir->xn_dir->xd_prev = tpdp->xd_prev; - } - ASSERT(tpdp->xd_next != tpdp); - ASSERT(tpdp->xd_prev != tpdp); - - /* - * tpdp points to the correct directory entry - */ - namelen = strlen(tpdp->xd_name) + 1; - - xmem_memfree(tpdp, sizeof (struct xdirent) + namelen); - dir->xn_size -= (sizeof (struct xdirent) + namelen); - dir->xn_dirents--; - - gethrestime(&now); - dir->xn_mtime = now; - dir->xn_ctime = now; - xp->xn_ctime = now; - - ASSERT(xp->xn_nlink > 0); - DECR_COUNT(&xp->xn_nlink, &xp->xn_tlock); - if (op == DR_RMDIR && xp->xn_type == VDIR) { - xdirtrunc(xp); - ASSERT(xp->xn_nlink == 0); - } - return (0); -} - -/* - * xdirinit is used internally to initialize a directory (dir) - * with '.' and '..' entries without checking permissions and locking - */ -void -xdirinit( - struct xmemnode *parent, /* parent of directory to initialize */ - struct xmemnode *dir) /* the new directory */ -{ - struct xdirent *dot, *dotdot; - timestruc_t now; - - ASSERT(RW_WRITE_HELD(&parent->xn_rwlock)); - ASSERT(dir->xn_type == VDIR); - - dot = xmem_memalloc(sizeof (struct xdirent) + 2, 1); - dotdot = xmem_memalloc(sizeof (struct xdirent) + 3, 1); - - /* - * Initialize the entries - */ - dot->xd_xmemnode = dir; - dot->xd_offset = 0; - dot->xd_name = (char *)dot + sizeof (struct xdirent); - dot->xd_name[0] = '.'; - dot->xd_parent = dir; - xmemfs_hash_in(dot); - - dotdot->xd_xmemnode = parent; - dotdot->xd_offset = 1; - dotdot->xd_name = (char *)dotdot + sizeof (struct xdirent); - dotdot->xd_name[0] = '.'; - dotdot->xd_name[1] = '.'; - dotdot->xd_parent = dir; - xmemfs_hash_in(dotdot); - - /* - * Initialize directory entry list. - */ - dot->xd_next = dotdot; - dot->xd_prev = dotdot; /* dot's xd_prev holds roving slot pointer */ - dotdot->xd_next = NULL; - dotdot->xd_prev = dot; - INCR_COUNT(&parent->xn_nlink, &parent->xn_tlock); - - dir->xn_dir = dot; - dir->xn_size = 2 * sizeof (struct xdirent) + 5; /* dot and dotdot */ - dir->xn_dirents = 2; - dir->xn_nlink = 2; /* one for daddy, and one just for being me */ - - gethrestime(&now); - dir->xn_mtime = now; - dir->xn_ctime = now; - parent->xn_ctime = now; -} - -/* - * xdirtrunc is called to remove all directory entries under this directory. - * The files themselves are removed elsewhere. - */ -void -xdirtrunc(struct xmemnode *dir) -{ - register struct xdirent *xdp; - size_t namelen; - timestruc_t now; - - ASSERT(RW_WRITE_HELD(&dir->xn_rwlock)); - ASSERT(dir->xn_type == VDIR); - - for (xdp = dir->xn_dir; xdp; xdp = dir->xn_dir) { - ASSERT(xdp->xd_next != xdp); - ASSERT(xdp->xd_prev != xdp); - ASSERT(xdp->xd_xmemnode); - ASSERT(xdp->xd_xmemnode->xn_nlink > 0); - - dir->xn_dir = xdp->xd_next; - namelen = strlen(xdp->xd_name) + 1; - - DECR_COUNT(&xdp->xd_xmemnode->xn_nlink, - &xdp->xd_xmemnode->xn_tlock); - - xmemfs_hash_out(xdp); - - xmem_memfree(xdp, sizeof (struct xdirent) + namelen); - dir->xn_size -= (sizeof (struct xdirent) + namelen); - dir->xn_dirents--; - } - - gethrestime(&now); - dir->xn_mtime = now; - dir->xn_ctime = now; - - ASSERT(dir->xn_dir == NULL); - ASSERT(dir->xn_size == 0); - ASSERT(dir->xn_dirents == 0); -} - -/* - * Check if the source directory is in the path of the target directory. - * The target directory is locked by the caller. - */ -static int -xdircheckpath( - struct xmemnode *fromxp, - struct xmemnode *toparent, - struct cred *cred) -{ - int error = 0; - struct xmemnode *dir, *dotdot; - struct xdirent *xdp; - - ASSERT(RW_WRITE_HELD(&toparent->xn_rwlock)); - - xdp = xmemfs_hash_lookup("..", toparent, 1, &dotdot); - if (xdp == NULL) - return (ENOENT); - - ASSERT(dotdot); - - if (dotdot == toparent) { - /* root of fs. search trivially satisfied. */ - xmemnode_rele(dotdot); - return (0); - } - for (;;) { - /* - * Return error for cases like "mv c c/d", - * "mv c c/d/e" and so on. - */ - if (dotdot == fromxp) { - xmemnode_rele(dotdot); - error = EINVAL; - break; - } - dir = dotdot; - error = xdirlookup(dir, "..", &dotdot, cred); - if (error) { - xmemnode_rele(dir); - break; - } - /* - * We're okay if we traverse the directory tree up to - * the root directory and don't run into the - * parent directory. - */ - if (dir == dotdot) { - xmemnode_rele(dir); - xmemnode_rele(dotdot); - break; - } - xmemnode_rele(dir); - } - return (error); -} - -static int -xdirrename( - struct xmemnode *fromparent, /* parent directory of source */ - struct xmemnode *fromxp, /* source xmemnode */ - struct xmemnode *toparent, /* parent directory of target */ - char *nm, /* entry we are trying to change */ - struct xmemnode *to, /* target xmemnode */ - struct xdirent *where, /* target xmemnode directory entry */ - struct cred *cred) /* credentials */ -{ - int error = 0; - int doingdirectory; - timestruc_t now; - -#if defined(lint) - nm = nm; -#endif - ASSERT(RW_WRITE_HELD(&toparent->xn_rwlock)); - - rw_enter(&fromxp->xn_rwlock, RW_READER); - rw_enter(&to->xn_rwlock, RW_READER); - - /* - * Check that everything is on the same filesystem. - */ - if (to->xn_vnode->v_vfsp != toparent->xn_vnode->v_vfsp || - to->xn_vnode->v_vfsp != fromxp->xn_vnode->v_vfsp) { - error = EXDEV; - goto out; - } - - /* - * Short circuit rename of something to itself. - */ - if (fromxp == to) { - error = ESAME; /* special KLUDGE error code */ - goto out; - } - - /* - * Must have write permission to rewrite target entry. - */ - if (error = xmem_xaccess(fromparent, VWRITE, cred)) - goto out; - - /* - * If the parent directory is "sticky", then the user must own - * either the parent directory or the destination of the rename, - * or else must have permission to write the destination. - * Otherwise the destination may not be changed (except by the - * privileged users). This implements append-only directories. - */ - if (error = xmem_sticky_remove_access(toparent, to, cred)) - goto out; - - /* - * Ensure source and target are compatible (both directories - * or both not directories). If target is a directory it must - * be empty and have no links to it; in addition it must not - * be a mount point, and both the source and target must be - * writable. - */ - doingdirectory = (fromxp->xn_type == VDIR); - if (to->xn_type == VDIR) { - if (!doingdirectory) { - error = EISDIR; - goto out; - } - /* - * vn_vfswlock will prevent mounts from using the directory - * until we are done. - */ - if (vn_vfswlock(XNTOV(to))) { - error = EBUSY; - goto out; - } - if (vn_mountedvfs(XNTOV(to)) != NULL) { - vn_vfsunlock(XNTOV(to)); - error = EBUSY; - goto out; - } - - mutex_enter(&to->xn_tlock); - if (to->xn_dirents > 2 || to->xn_nlink > 2) { - mutex_exit(&to->xn_tlock); - vn_vfsunlock(XNTOV(to)); - error = EEXIST; /* SIGH should be ENOTEMPTY */ - /* - * Update atime because checking xn_dirents is - * logically equivalent to reading the directory - */ - gethrestime(&to->xn_atime); - goto out; - } - mutex_exit(&to->xn_tlock); - } else if (doingdirectory) { - error = ENOTDIR; - goto out; - } - - where->xd_xmemnode = fromxp; - gethrestime(&now); - toparent->xn_mtime = now; - toparent->xn_ctime = now; - - /* - * Upgrade to write lock on "to" (i.e., the target xmemnode). - */ - rw_exit(&to->xn_rwlock); - rw_enter(&to->xn_rwlock, RW_WRITER); - - /* - * Decrement the link count of the target xmemnode. - */ - DECR_COUNT(&to->xn_nlink, &to->xn_tlock); - to->xn_ctime = now; - - if (doingdirectory) { - /* - * The entry for "to" no longer exists so release the vfslock. - */ - vn_vfsunlock(XNTOV(to)); - - /* - * Decrement the target link count and delete all entires. - */ - xdirtrunc(to); - ASSERT(to->xn_nlink == 0); - - /* - * Renaming a directory with the parent different - * requires that ".." be rewritten. The window is - * still there for ".." to be inconsistent, but this - * is unavoidable, and a lot shorter than when it was - * done in a user process. - */ - if (fromparent != toparent) - xdirfixdotdot(fromxp, fromparent, toparent); - } -out: - rw_exit(&to->xn_rwlock); - rw_exit(&fromxp->xn_rwlock); - return (error); -} - -static void -xdirfixdotdot( - struct xmemnode *fromxp, /* child directory */ - struct xmemnode *fromparent, /* old parent directory */ - struct xmemnode *toparent) /* new parent directory */ -{ - struct xdirent *dotdot; - - ASSERT(RW_LOCK_HELD(&toparent->xn_rwlock)); - - /* - * Increment the link count in the new parent xmemnode - */ - INCR_COUNT(&toparent->xn_nlink, &toparent->xn_tlock); - gethrestime(&toparent->xn_ctime); - - dotdot = xmemfs_hash_lookup("..", fromxp, 0, NULL); - - ASSERT(dotdot->xd_xmemnode == fromparent); - dotdot->xd_xmemnode = toparent; - - /* - * Decrement the link count of the old parent xmemnode. - * If fromparent is NULL, then this is a new directory link; - * it has no parent, so we need not do anything. - */ - if (fromparent != NULL) { - mutex_enter(&fromparent->xn_tlock); - if (fromparent->xn_nlink != 0) { - fromparent->xn_nlink--; - gethrestime(&fromparent->xn_ctime); - } - mutex_exit(&fromparent->xn_tlock); - } -} - -static int -xdiraddentry( - struct xmemnode *dir, /* target directory to make entry in */ - struct xmemnode *xp, /* new xmemnode */ - char *name, - enum de_op op, - struct xmemnode *fromxp) -{ - struct xdirent *xdp, *tpdp; - size_t namelen, alloc_size; - timestruc_t now; - - /* - * Make sure the parent directory wasn't removed from - * underneath the caller. - */ - if (dir->xn_dir == NULL) - return (ENOENT); - - /* - * Check that everything is on the same filesystem. - */ - if (xp->xn_vnode->v_vfsp != dir->xn_vnode->v_vfsp) - return (EXDEV); - - /* - * Allocate and initialize directory entry - */ - namelen = strlen(name) + 1; - alloc_size = namelen + sizeof (struct xdirent); - xdp = xmem_memalloc(alloc_size, 0); - if (xdp == NULL) - return (ENOSPC); - - if ((op == DE_RENAME) && (xp->xn_type == VDIR)) - xdirfixdotdot(xp, fromxp, dir); - - dir->xn_size += alloc_size; - dir->xn_dirents++; - xdp->xd_xmemnode = xp; - xdp->xd_parent = dir; - - /* - * The directory entry and its name were allocated sequentially. - */ - xdp->xd_name = (char *)xdp + sizeof (struct xdirent); - (void) strcpy(xdp->xd_name, name); - - xmemfs_hash_in(xdp); - - /* - * Some utilities expect the size of a directory to remain - * somewhat static. For example, a routine which unlinks - * files between calls to readdir(); the size of the - * directory changes from underneath it and so the real - * directory offset in bytes is invalid. To circumvent - * this problem, we initialize a directory entry with an - * phony offset, and use this offset to determine end of - * file in xmem_readdir. - */ - tpdp = dir->xn_dir->xd_prev; - /* - * Install at first empty "slot" in directory list. - */ - while (tpdp->xd_next != NULL && (tpdp->xd_next->xd_offset - - tpdp->xd_offset) <= 1) { - ASSERT(tpdp->xd_next != tpdp); - ASSERT(tpdp->xd_prev != tpdp); - ASSERT(tpdp->xd_next->xd_offset > tpdp->xd_offset); - tpdp = tpdp->xd_next; - } - xdp->xd_offset = tpdp->xd_offset + 1; - - /* - * If we're at the end of the dirent list and the offset (which - * is necessarily the largest offset in this directory) is more - * than twice the number of dirents, that means the directory is - * 50% holes. At this point we reset the slot pointer back to - * the beginning of the directory so we start using the holes. - * The idea is that if there are N dirents, there must also be - * N holes, so we can satisfy the next N creates by walking at - * most 2N entries; thus the average cost of a create is constant. - * Note that we use the first dirent's xd_prev as the roving - * slot pointer; it's ugly, but it saves a word in every dirent. - */ - if (tpdp->xd_next == NULL && tpdp->xd_offset > 2 * dir->xn_dirents) - dir->xn_dir->xd_prev = dir->xn_dir->xd_next; - else - dir->xn_dir->xd_prev = xdp; - - ASSERT(tpdp->xd_next != tpdp); - ASSERT(tpdp->xd_prev != tpdp); - - xdp->xd_next = tpdp->xd_next; - if (xdp->xd_next) { - xdp->xd_next->xd_prev = xdp; - } - xdp->xd_prev = tpdp; - tpdp->xd_next = xdp; - - ASSERT(xdp->xd_next != xdp); - ASSERT(xdp->xd_prev != xdp); - ASSERT(tpdp->xd_next != tpdp); - ASSERT(tpdp->xd_prev != tpdp); - - gethrestime(&now); - dir->xn_mtime = now; - dir->xn_ctime = now; - - return (0); -} - -static int -xdirmakexnode( - struct xmemnode *dir, - struct xmount *xm, - struct vattr *va, - enum de_op op, - struct xmemnode **newnode, - struct cred *cred) -{ - struct xmemnode *xp; - enum vtype type; - - ASSERT(va != NULL); - ASSERT(op == DE_CREATE || op == DE_MKDIR); - if (((va->va_mask & AT_ATIME) && TIMESPEC_OVERFLOW(&va->va_atime)) || - ((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime))) - return (EOVERFLOW); - type = va->va_type; - xp = xmem_memalloc(sizeof (struct xmemnode), 1); - xp->xn_vnode = vn_alloc(KM_SLEEP); - xmemnode_init(xm, xp, va, cred); - if (type == VBLK || type == VCHR) { - xp->xn_vnode->v_rdev = xp->xn_rdev = va->va_rdev; - } else { - xp->xn_vnode->v_rdev = xp->xn_rdev = NODEV; - } - xp->xn_vnode->v_type = type; - xp->xn_uid = crgetuid(cred); - - /* - * To determine the group-id of the created file: - * 1) If the gid is set in the attribute list (non-Sun & pre-4.0 - * clients are not likely to set the gid), then use it if - * the process is privileged, belongs to the target group, - * or the group is the same as the parent directory. - * 2) If the filesystem was not mounted with the Old-BSD-compatible - * GRPID option, and the directory's set-gid bit is clear, - * then use the process's gid. - * 3) Otherwise, set the group-id to the gid of the parent directory. - */ - if ((va->va_mask & AT_GID) && - ((va->va_gid == dir->xn_gid) || groupmember(va->va_gid, cred) || - secpolicy_vnode_create_gid(cred) == 0)) { - xp->xn_gid = va->va_gid; - } else { - if (dir->xn_mode & VSGID) - xp->xn_gid = dir->xn_gid; - else - xp->xn_gid = crgetgid(cred); - } - /* - * If we're creating a directory, and the parent directory has the - * set-GID bit set, set it on the new directory. - * Otherwise, if the user is neither privileged nor a member of the - * file's new group, clear the file's set-GID bit. - */ - if (dir->xn_mode & VSGID && type == VDIR) - xp->xn_mode |= VSGID; - else if ((xp->xn_mode & VSGID) && - secpolicy_vnode_setids_setgids(cred, xp->xn_gid) != 0) - xp->xn_mode &= ~VSGID; - - if (va->va_mask & AT_ATIME) - xp->xn_atime = va->va_atime; - if (va->va_mask & AT_MTIME) - xp->xn_mtime = va->va_mtime; - - if (op == DE_MKDIR) - xdirinit(dir, xp); - - *newnode = xp; - return (0); -}
--- a/usr/src/uts/intel/fs/xmemfs/xmem_subr.c Thu Jan 18 14:25:26 2007 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,566 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/types.h> -#include <sys/errno.h> -#include <sys/param.h> -#include <sys/t_lock.h> -#include <sys/systm.h> -#include <sys/sysmacros.h> -#include <sys/debug.h> -#include <sys/time.h> -#include <sys/cmn_err.h> -#include <sys/vnode.h> -#include <sys/vfs.h> -#include <sys/cred.h> -#include <sys/kmem.h> -#include <sys/stat.h> -#include <sys/mode.h> -#include <vm/hat.h> -#include <vm/seg_map.h> -#include <vm/seg_kmem.h> -#include <vm/pvn.h> -#include <vm/page.h> -#include <sys/atomic.h> -#include <sys/policy.h> -#include <sys/fs/xmem.h> - - -extern void *xpgget(struct xmount *); -extern void xpgput(struct xmount *, void *); - -#define MODESHIFT 3 - -size_t xmemfs_maxkmem = 32768; -size_t xmemfs_kmemcnt; - -int -xmem_xaccess(void *vxp, int mode, struct cred *cred) -{ - struct xmemnode *xp = vxp; - int shift = 0; - /* - * Check access based on owner, group and - * public permissions in xmemnode. - */ - if (crgetuid(cred) != xp->xn_uid) { - shift += MODESHIFT; - if (groupmember(xp->xn_gid, cred) == 0) - shift += MODESHIFT; - } - - mode &= ~(xp->xn_mode << shift); - - if (mode == 0) - return (0); - - return (secpolicy_vnode_access(cred, XNTOV(xp), xp->xn_uid, mode)); -} - -/* - * Decide whether it is okay to remove within a sticky directory. - * Two conditions need to be met: write access to the directory - * is needed. In sticky directories, write access is not sufficient; - * you can remove entries from a directory only if you own the directory, - * if you are privileged, if you own the entry or if they entry is - * a plain file and you have write access to that file. - * Function returns 0 if remove access is granted. - */ -int -xmem_sticky_remove_access(struct xmemnode *dir, struct xmemnode *entry, - struct cred *cr) -{ - uid_t uid; - - if ((dir->xn_mode & S_ISVTX) && - (uid = crgetuid(cr)) != dir->xn_uid && - uid != entry->xn_uid && - (entry->xn_type != VREG || - xmem_xaccess(entry, VWRITE, cr) != 0)) - return (secpolicy_vnode_remove(cr)); - return (0); -} - -/* - * Allocate zeroed memory if xmemfs_maxkmem has not been exceeded - * or the 'musthave' flag is set. 'musthave' allocations should - * always be subordinate to normal allocations so that xmemfs_maxkmem - * can't be exceeded by more than a few KB. Example: when creating - * a new directory, the xmemnode is a normal allocation; if that - * succeeds, the dirents for "." and ".." are 'musthave' allocations. - */ -void * -xmem_memalloc(size_t size, int musthave) -{ - void *ptr = NULL; - - if (musthave) { - atomic_add_long(&xmemfs_kmemcnt, size); - ptr = kmem_zalloc(size, KM_SLEEP); - } else if (xmemfs_kmemcnt + size < xmemfs_maxkmem) { - /* - * kmemcnt may have increased since above check so a little - * more than xmemfs_maxkmem may be allocated. - */ - ptr = kmem_zalloc(size, KM_NOSLEEP); - if (ptr) - atomic_add_long(&xmemfs_kmemcnt, size); - } - return (ptr); -} - -void -xmem_memfree(void *cp, size_t size) -{ - extern size_t xmemfs_kmemcnt; - - kmem_free(cp, size); - atomic_add_long(&xmemfs_kmemcnt, -size); -} - -/* add to the number of pages we have created */ - -int -xmem_mem_add(struct xmount *xm, size_t size) -{ - mutex_enter(&xm->xm_contents); - - /* allocate the last available block */ - if ((xm->xm_mem + size) > xm->xm_max) { - mutex_exit(&xm->xm_contents); - return (1); - } - xm->xm_mem += size; - mutex_exit(&xm->xm_contents); - return (0); -} - -/* sub to the number of pages we have created */ - -static void -xmem_mem_sub(struct xmount *xm, size_t size) -{ - mutex_enter(&xm->xm_contents); - xm->xm_mem -= size; - mutex_exit(&xm->xm_contents); -} - -/* - * xmem_acquire_pages: returns an array of size btop(xm_bsize) page pointers - * or xm_bsize bytes. - * - * If large page, the array will contain 1024 entries (4MB) or 512 entries. - * - * If not large page, there is no array as a page_t * is returned. - */ - -static page_t ** -xmem_acquire_pages(struct xmount *xm, struct vnode *vp, offset_t off) -{ - page_t **ppa, *pp, *pplist; - uint_t pindex; - size_t bsize; - struct seg tmpseg; - - bsize = xm->xm_bsize; - - if (xmem_mem_add(xm, 1)) - return (NULL); - - if (xm->xm_flags & XARGS_RESERVEMEM) { - - mutex_enter(&xm->xm_contents); - ppa = xpgget(xm); - mutex_exit(&xm->xm_contents); - - if (xm->xm_ppb == 1) { - /* ppa is a direct page pointer */ - - if (!page_hashin((page_t *)ppa, vp, off, NULL)) { - panic("xmem_acquire_pages: hashin failed" - " %p %llx", (void *)vp, off); - } - pindex = xm->xm_ppb; /* bypass for loop */ - } else { - pindex = 0; - } - - for (; pindex < xm->xm_ppb; pindex++, off += PAGESIZE) { - pp = ppa[pindex]; - if (!page_hashin(pp, vp, off, NULL)) { - panic("xmem_acquire_pages: hashin failed" - " %p %p %llx", (void *)pp, (void *)vp, off); - } - } - return (ppa); - } - bzero(&tmpseg, sizeof (struct seg)); - tmpseg.s_as = &kas; - - if ((freemem - xm->xm_ppb) < xmemfs_minfree || - page_resv(xm->xm_ppb, KM_NOSLEEP) == 0) { - - cmn_err(CE_WARN, "%s: File system full, no memory", - xm->xm_mntpath); - return (NULL); - } - - (void) page_create_wait(xm->xm_ppb, PG_WAIT); - - pplist = page_get_freelist(vp, off, &tmpseg, - (caddr_t)(uintptr_t)off, bsize, 0, NULL); - if (pplist == NULL && xm->xm_ppb == 1) { - pplist = page_get_cachelist(vp, off, &tmpseg, - (caddr_t)(uintptr_t)off, 0, NULL); - } - if (pplist == NULL) { - page_create_putback(xm->xm_ppb); - page_unresv(xm->xm_ppb); - return (NULL); - } - if (PP_ISAGED(pplist) == 0) { - ASSERT(xm->xm_ppb == 1); - page_hashout(pplist, NULL); - } - - if (xm->xm_ppb > 1) - ppa = kmem_alloc(sizeof (*ppa) * xm->xm_ppb, KM_SLEEP); - - for (pindex = 0; pindex < xm->xm_ppb; pindex++, off += PAGESIZE) { - pp = pplist; - page_sub(&pplist, pp); - ASSERT(PAGE_EXCL(pp)); - ASSERT(pp->p_vnode == NULL); - ASSERT(!hat_page_is_mapped(pp)); - PP_CLRFREE(pp); - PP_CLRAGED(pp); - - if (xm->xm_ppb == 1) - ppa = (page_t **)pp; - else - ppa[pindex] = pp; - - if (!page_hashin(pp, vp, off, NULL)) { - panic("xmem_acquire_pages: hashin failed" - " %p %p %llx", (void *)pp, (void *)vp, off); - } - page_downgrade(pp); /* XXX */ - } - return (ppa); -} - -static void -xmem_release_pages(struct xmount *xm, page_t **ppa) -{ - uint_t pindex; - page_t *pp; - - xmem_mem_sub(xm, 1); - - if (xm->xm_flags & XARGS_RESERVEMEM) { - - /* - * if ppb == 1 and to lessen the load on kmem memory in - * having to allocate a million 4 byte pointers for a - * 4 GB file system, ppa is actually a page_t * - */ - - if (xm->xm_ppb == 1) { - page_hashout((page_t *)ppa, NULL); - pindex = xm->xm_ppb; /* bypass for loop */ - } else - pindex = 0; - - for (; pindex < xm->xm_ppb; pindex++) { - pp = ppa[pindex]; - page_hashout(pp, NULL); - } - mutex_enter(&xm->xm_contents); - xpgput(xm, ppa); - mutex_exit(&xm->xm_contents); - - } else { - int flag = B_INVAL; - - if (xm->xm_ppb == 1) { - VN_DISPOSE((page_t *)ppa, flag, 0, kcred); - } else { - - for (pindex = 0; pindex < xm->xm_ppb; pindex++) - VN_DISPOSE(ppa[pindex], flag, 0, kcred); - - kmem_free(ppa, sizeof (*ppa) * xm->xm_ppb); - } - page_unresv(xm->xm_ppb); - } -} - -/* - * Initialize a xmemnode and add it to file list under mount point. - */ -void -xmemnode_init(struct xmount *xm, struct xmemnode *xp, - vattr_t *vap, cred_t *cred) -{ - struct vnode *vp; - timestruc_t now; - - ASSERT(vap != NULL); - ASSERT(cred != NULL); - - rw_init(&xp->xn_rwlock, NULL, RW_DEFAULT, NULL); - mutex_init(&xp->xn_tlock, NULL, MUTEX_DEFAULT, NULL); - xp->xn_mode = MAKEIMODE(vap->va_type, vap->va_mode); - - if (S_ISREG(xp->xn_mode)) - xp->xn_mode &= ~(S_IXUSR | S_IXGRP | S_IXOTH); - - xp->xn_mask = 0; - xp->xn_type = vap->va_type; - xp->xn_nodeid = (ino64_t)(uint32_t)((uintptr_t)xp >> 3); - xp->xn_nlink = 1; - xp->xn_size = 0; - xp->xn_uid = crgetuid(cred); - xp->xn_gid = crgetgid(cred); - - xp->xn_fsid = xm->xm_dev; - xp->xn_rdev = vap->va_rdev; - xp->xn_blksize = PAGESIZE; - xp->xn_nblocks = 0; - gethrestime(&now); - xp->xn_atime = now; - xp->xn_mtime = now; - xp->xn_ctime = now; - xp->xn_seq = 0; - xp->xn_dir = NULL; - - vp = XNTOV(xp); - vn_reinit(vp); - vn_setops(vp, xmem_vnodeops); - vp->v_vfsp = xm->xm_vfsp; - vp->v_type = vap->va_type; - vp->v_rdev = vap->va_rdev; - vp->v_data = (caddr_t)xp; - - mutex_enter(&xm->xm_contents); - /* - * Increment the pseudo generation number for this xmemnode. - * Since xmemnodes are allocated and freed, there really is no - * particular generation number for a new xmemnode. Just fake it - * by using a counter in each file system. - */ - xp->xn_gen = xm->xm_gen++; - - /* - * Add new xmemnode to end of linked list of xmemnodes for this xmemfs - * Root directory is handled specially in xmem_mount. - */ - if (xm->xm_rootnode != (struct xmemnode *)NULL) { - xp->xn_forw = NULL; - xp->xn_back = xm->xm_rootnode->xn_back; - xp->xn_back->xn_forw = xm->xm_rootnode->xn_back = xp; - } - mutex_exit(&xm->xm_contents); -} - -/* - * - */ -int -xmem_fillpages(struct xmemnode *xp, struct vnode *vp, offset_t off, - offset_t len, int zerofill) -{ - uint_t blockno, endblock; - caddr_t base; - int error = 0; - struct xmount *xm = (struct xmount *)VTOXM(vp); - offset_t poff; - size_t bsize = xm->xm_bsize; - - blockno = off >> xm->xm_bshift; - poff = (offset_t)blockno << xm->xm_bshift; - endblock = howmany(off + len, (offset_t)bsize); - - if (endblock > xp->xn_ppasz) - return (EINVAL); - - /* Create missing pages if any */ - for (; blockno < endblock; ) { - if (!xp->xn_ppa[blockno]) { - xp->xn_ppa[blockno] = xmem_acquire_pages(xm, vp, poff); - if (!xp->xn_ppa[blockno]) - return (ENOSPC); - if (zerofill) { - page_t **ppp; - if (xm->xm_ppb == 1) - ppp = (page_t **)&xp->xn_ppa[blockno]; - else - ppp = xp->xn_ppa[blockno]; - - base = segxmem_getmap(xm->xm_map, vp, poff, - bsize, ppp, S_WRITE); - (void) kzero(base, bsize); - segxmem_release(xm->xm_map, base, bsize); - } - xp->xn_nblocks++; - } - blockno++; - poff += bsize; - } - return (error); -} - -/* - * xmemnode_trunc - set length of xmemnode and deal with resources - */ -int -xmemnode_trunc(struct xmount *xm, struct xmemnode *xp, u_offset_t newsize) -{ - u_offset_t oldsize = xp->xn_size; - timestruc_t now; - int error = 0; - size_t zlen; - ulong_t newblocks, oldblocks; - - ASSERT(RW_WRITE_HELD(&xp->xn_rwlock)); - ASSERT(RW_WRITE_HELD(&xp->xn_contents)); - - if (newsize == oldsize) { - /* Required by POSIX */ - goto stamp_out; - } - - switch (xp->xn_type) { - case VREG: - - oldblocks = howmany(oldsize, xm->xm_bsize); - newblocks = howmany(newsize, xm->xm_bsize); - - XMEMPRINTF(4, ("xmemnode_trunc: xp %p old %lx new %lx\n", - xp, oldblocks, newblocks)); - /* - * xn_ppasz is the size of the ppa array which may not - * be fully populated if pages cannot be allocated. - */ - ASSERT(xp->xn_ppasz >= oldblocks); - - /* Growing the file */ - if (newblocks > oldblocks) { - if (xp->xn_ppasz < newblocks) { - page_t ***ppa; - ppa = kmem_zalloc(newblocks * sizeof (*ppa), KM_SLEEP); - if (xp->xn_ppasz) { - bcopy(xp->xn_ppa, ppa, - newblocks * sizeof (*ppa)); - - kmem_free(xp->xn_ppa, - xp->xn_ppasz * sizeof (*ppa)); - } - xp->xn_ppa = ppa; - xp->xn_ppasz = newblocks; - } - } - - /* Free pages if shrinking file over block boundary. */ - if (newblocks < oldblocks) { - uint_t next; - page_t ***ppa = NULL; - next = newblocks; - if (next) { - ppa = kmem_zalloc(next * sizeof (*ppa), - KM_SLEEP); - bcopy(xp->xn_ppa, ppa, next * sizeof (*ppa)); - } - for (; next < oldblocks; next++) { - if (!xp->xn_ppa[next]) - continue; - xmem_release_pages(xm, xp->xn_ppa[next]); - xp->xn_nblocks--; - } - kmem_free(xp->xn_ppa, xp->xn_ppasz * sizeof (*ppa)); - xp->xn_ppa = ppa; - xp->xn_ppasz = newblocks; - } - - /* - * Update the file size now to reflect the pages we just - * blew away as we're about to drop the - * contents lock to zero the partial page (which could - * re-enter xmemfs via getpage and try to reacquire the lock) - * Once we drop the lock, faulters can fill in holes in - * the file and if we haven't updated the size they - * may fill in holes that are beyond EOF, which will then - * never get cleared. - */ - xp->xn_size = newsize; - - - if (newsize) { - /* Zero new size of file to page boundary. */ - zlen = PAGESIZE - ((ulong_t)newsize & PAGEOFFSET); - rw_exit(&xp->xn_contents); - pvn_vpzero(XNTOV(xp), (u_offset_t)newsize, zlen); - rw_enter(&xp->xn_contents, RW_WRITER); - } - - break; - - case VLNK: - /* - * Don't do anything here - * xmem_inactive frees the memory - */ - if (newsize != 0) - error = EINVAL; - goto out; - case VDIR: - /* - * Remove all the directory entries under this directory. - */ - if (newsize != 0) { - error = EINVAL; - goto out; - } - xdirtrunc(xp); - ASSERT(xp->xn_nlink == 0); - break; - default: - goto out; - } - -stamp_out: - gethrestime(&now); - xp->xn_mtime = now; - xp->xn_ctime = now; -out: - /* - * xmemnode_trunc() cannot fail when newsize == 0. - */ - ASSERT(error == 0 || newsize != 0); - return (error); -}
--- a/usr/src/uts/intel/fs/xmemfs/xmem_vfsops.c Thu Jan 18 14:25:26 2007 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,810 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/sysmacros.h> -#include <sys/kmem.h> -#include <sys/time.h> -#include <sys/pathname.h> -#include <sys/vfs.h> -#include <sys/vnode.h> -#include <sys/stat.h> -#include <sys/uio.h> -#include <sys/stat.h> -#include <sys/errno.h> -#include <sys/cmn_err.h> -#include <sys/cred.h> -#include <sys/statvfs.h> -#include <sys/mount.h> -#include <sys/mntent.h> -#include <sys/debug.h> -#include <sys/systm.h> -#include <sys/vmsystm.h> -#include <sys/bitmap.h> -#include <fs/fs_subr.h> -#include <vm/page.h> -#include <sys/model.h> -#include <sys/map.h> -#include <vm/seg_kmem.h> -#include <sys/cpuvar.h> -#include <sys/policy.h> - -#include <sys/fs/swapnode.h> -#include <sys/fs/xmem.h> - -#ifndef min -#define min(a, b) ((a) < (b) ? (a) : (b)) -#endif - -/* - * xmemfs vfs operations. - */ -static int xmemfsinit(int, char *); -static int xmem_mount(struct vfs *, struct vnode *, - struct mounta *, struct cred *); -static int xmem_unmount(struct vfs *, int, struct cred *); -static int xmem_root(struct vfs *, struct vnode **); -static int xmem_statvfs(struct vfs *, struct statvfs64 *); -static int xmem_vget(struct vfs *, struct vnode **, struct fid *); - -/* - * Loadable module wrapper - */ -#include <sys/modctl.h> - -static vfsdef_t vfw = { - VFSDEF_VERSION, - "xmemfs", - xmemfsinit, - 0, - NULL -}; - -/* - * Module linkage information - */ -static struct modlfs modlfs = { - &mod_fsops, "filesystem for xmemfs", &vfw -}; - -static struct modlinkage modlinkage = { - MODREV_1, &modlfs, NULL -}; - -pgcnt_t xmemfs_minfree; - -int -_init() -{ - return (mod_install(&modlinkage)); -} - -int -_fini() -{ - return (mod_remove(&modlinkage)); -} - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); -} - -static int xmemfsfstype; -static major_t xmemfs_major; -static minor_t xmemfs_minor; -static kmutex_t xmemfs_minor_lock; - - -/* - * initialize global xmemfs locks and such - * called when loading xmemfs module - */ -static int -xmemfsinit(int fstype, char *name) -{ - static const fs_operation_def_t xmem_vfsops[] = { - VFSNAME_MOUNT, xmem_mount, - VFSNAME_UNMOUNT, xmem_unmount, - VFSNAME_ROOT, xmem_root, - VFSNAME_STATVFS, xmem_statvfs, - VFSNAME_VGET, xmem_vget, - NULL, NULL - }; - int error; - extern void xmemfs_hash_init(); - - error = vfs_setfsops(fstype, xmem_vfsops, NULL); - if (error != 0) { - cmn_err(CE_WARN, "xmemfsinit: bad vfs ops template"); - return (error); - } - - error = vn_make_ops(name, xmem_vnodeops_template, &xmem_vnodeops); - if (error != 0) { - (void) vfs_freevfsops_by_type(fstype); - cmn_err(CE_WARN, "xmemfsinit: bad vnode ops template"); - return (error); - } - - xmemfs_hash_init(); - xmemfsfstype = fstype; - ASSERT(xmemfsfstype != 0); - - if ((xmemfs_major = getudev()) == (major_t)-1) { - cmn_err(CE_WARN, "xmemfsinit: Can't get unique device number."); - xmemfs_major = 0; - } - mutex_init(&xmemfs_minor_lock, NULL, MUTEX_DEFAULT, NULL); - - return (0); -} - - -/* - * xpg is an array of page_t * if xm_ppb > 1. - * xpg is a page_t * if xm_ppb == 1 - */ -void -xpgput(struct xmount *xm, void *xpg) -{ - ASSERT(xm->xm_xpgcnt < xm->xm_max); - xm->xm_xpgarray[xm->xm_xpgcnt++] = xpg; -} - -void * -xpgget(struct xmount *xm) -{ - if (!xm->xm_xpgcnt) - return (NULL); - - return (xm->xm_xpgarray[--xm->xm_xpgcnt]); -} - -void -xpginit(struct xmount *xm) -{ - xm->xm_xpgcnt = 0; - xm->xm_xpgarray = kmem_zalloc(sizeof (void *) * xm->xm_max, KM_SLEEP); -} - -void -xpgtrunc(struct xmount *xm, size_t newsz) -{ - void *old = xm->xm_xpgarray; - - ASSERT(newsz == xm->xm_xpgcnt); - if (newsz) { - xm->xm_xpgarray = - kmem_alloc(sizeof (void *) * newsz, KM_SLEEP); - bcopy(old, xm->xm_xpgarray, sizeof (void *) * newsz); - } - kmem_free(old, sizeof (void *) * xm->xm_max); -} - -void -xpgdeinit(struct xmount *xm) -{ - xm->xm_xpgcnt = 0; - if (xm->xm_max) - kmem_free(xm->xm_xpgarray, sizeof (void *) * xm->xm_max); - xm->xm_xpgarray = NULL; -} - - -struct xmount *xmountp; /* ### DEBUG */ - -#define XFREE(xm, xp) \ - vn_free(xp->xn_vnode); \ - xmem_memfree(xp, sizeof (struct xmemnode)); \ - rmfreemap(xm->xm_map); \ - xmem_memfree(xm->xm_mntpath, strlen(xm->xm_mntpath) + 1); \ - xpgdeinit(xm); \ - xmem_memfree(xm, sizeof (struct xmount)); - - -static int -xmem_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap, - struct cred *cr) -{ - struct xmount *xm; - struct xmemnode *xp; - struct pathname dpn; - char *data = uap->dataptr; - int datalen = uap->datalen; - int error; - struct xmemfs_args xargs; - struct vattr rattr; - int got_attrs, num_pagesizes; - uint_t blocks_left; - size_t frag; - - XMEMPRINTF(1, ("xmem_mount: vfs %p mvp %p uap %p cr %p\n", - (void *)vfsp, (void *)mvp, (void *)uap, (void *)cr)); - - if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0) - return (error); - - if (mvp->v_type != VDIR) - return (ENOTDIR); - - /* - * Force non-executable files by setting the "noexec" option - * which will be interpreted by the VFS layer. - */ - vfs_setmntopt(vfsp, MNTOPT_NOEXEC, NULL, 0); - - mutex_enter(&mvp->v_lock); - if ((uap->flags & MS_OVERLAY) == 0 && - (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { - mutex_exit(&mvp->v_lock); - return (EBUSY); - } - mutex_exit(&mvp->v_lock); - - /* - * Get arguments - */ - if (datalen != 0) { - if (datalen != sizeof (xargs)) - return (EINVAL); - else { - if (copyin(data, &xargs, sizeof (xargs))) - return (EFAULT); - } - if (xargs.xa_bsize == 0) - xargs.xa_bsize = PAGESIZE; - } else { - xargs.xa_bsize = PAGESIZE; - xargs.xa_flags = 0; - xargs.xa_fssize = 0; - } - - XMEMPRINTF(1, ("xmem_mount: xa bsize %llx fssize %llx flags %x\n", - xargs.xa_bsize, xargs.xa_fssize, xargs.xa_flags)); - - num_pagesizes = page_num_pagesizes(); - - if (xargs.xa_flags & XARGS_LARGEPAGES) - xargs.xa_bsize = page_get_pagesize(num_pagesizes - 1); - - /* Make sure xa_bsize is a pure power of two */ - if (!IS_P2ALIGNED(xargs.xa_bsize, xargs.xa_bsize - 1)) { - cmn_err(CE_WARN, "xmemfs: invalid blocksize %x", - (int)xargs.xa_bsize); - xargs.xa_bsize = PAGESIZE; - } - - while (--num_pagesizes >= 0) - if (xargs.xa_bsize == page_get_pagesize(num_pagesizes)) - break; - - if (num_pagesizes < 0) { - cmn_err(CE_WARN, - "xmemfs: blocksize %lld not a natural pagesize", - xargs.xa_bsize); - xargs.xa_bsize = PAGESIZE; - } - - if (error = pn_get(uap->dir, UIO_USERSPACE, &dpn)) - return (error); - - xm = xmem_memalloc(sizeof (struct xmount), 1); - - xmountp = xm; - - XMEMPRINTF(4, ("xmem_mount: xm %p\n", (void *)xm)); - - xm->xm_mntpath = xmem_memalloc(dpn.pn_pathlen + 1, 1); - (void) strcpy(xm->xm_mntpath, dpn.pn_path); - pn_free(&dpn); - - xm->xm_vmmapsize = xm->xm_mapsize = - xargs.xa_bsize * SEGXMEM_NUM_SIMULMAPS; - - /* need to allocate more to ensure alignment if largepage */ - - if (xargs.xa_bsize != PAGESIZE) - xm->xm_vmmapsize += xargs.xa_bsize; - - /* Set block size & max memory allowed for the file system */ - xm->xm_bsize = (size_t)xargs.xa_bsize; - xm->xm_bshift = highbit(xargs.xa_bsize) - 1; - - /* - * 5 * lotsfree satisfies XMEMMINFREE for 4 GB of memory and above. - */ - xmemfs_minfree = min(5 * lotsfree, XMEMMINFREE/PAGESIZE); - - if (xargs.xa_fssize) { - - pgcnt_t fspgcnt; - - xargs.xa_fssize = roundup(xargs.xa_fssize, xm->xm_bsize); - - fspgcnt = xargs.xa_fssize >> PAGESHIFT; - - /* sanity check this against freemem */ - if (fspgcnt + xmemfs_minfree > freemem) { - xmem_memfree(xm->xm_mntpath, - strlen(xm->xm_mntpath) + 1); - xmem_memfree(xm, sizeof (struct xmount)); - return (EFBIG); - } - xm->xm_max = xargs.xa_fssize >> xm->xm_bshift; - } else { - /* - * fssize is mandatory - should not be here but if - * fssize == 0 is allowed, grab all of free memory - * minus xmemfs_minfree. - */ - - if (freemem < xmemfs_minfree) - xm->xm_max = 0; - else - xm->xm_max = freemem - xmemfs_minfree; - - xm->xm_max >>= xm->xm_bshift - PAGESHIFT; - } - - xm->xm_ppb = btop(xm->xm_bsize); /* pages per block */ - - - XMEMPRINTF(1, ("xmem_mount: xm_max %lx xm_bsize %lx\n", - xm->xm_max, xm->xm_bsize)); - - /* - * Allocate a map to provide an address for each page in - * (xargs.xa_bsize * 4) and free all of them. - */ - xm->xm_map = rmallocmap_wait(xm->xm_mapsize / PAGESIZE); - - xpginit(xm); - - xp = xmem_memalloc(sizeof (struct xmemnode), 1); - xp->xn_vnode = vn_alloc(KM_SLEEP); - - /* - * do not SLEEP waiting for memory resources after vmem_alloc - */ - - xm->xm_vmmapaddr = xm->xm_mapaddr = - vmem_alloc(heap_arena, xm->xm_vmmapsize, VM_NOSLEEP); - - if (!xm->xm_mapaddr) { - XFREE(xm, xp); - return (ENOMEM); - } - - if ((frag = ((uintptr_t)xm->xm_mapaddr & - ((uintptr_t)xargs.xa_bsize - 1))) != 0) - xm->xm_mapaddr += (xargs.xa_bsize - frag); - - rmfree(xm->xm_map, xm->xm_mapsize, (ulong_t)xm->xm_mapaddr); - - if (xargs.xa_flags & XARGS_RESERVEMEM) { - struct seg tmpseg; - - /* grab all memory now */ - blocks_left = xm->xm_max; - bzero(&tmpseg, sizeof (struct seg)); - tmpseg.s_as = &kas; - - if (page_resv(xm->xm_max * xm->xm_ppb, KM_NOSLEEP) == 0) { - vmem_free(heap_arena, xm->xm_vmmapaddr, - xm->xm_vmmapsize); - XFREE(xm, xp); - return (ENOMEM); - } - - while (blocks_left) { - page_t *pp, *pplist; - page_t **ppa; - int i; - - /* - * optimise for ppb == 1 - let xp_ppa point directly - * to page. - */ - - if (xm->xm_ppb > 1) { - ppa = kmem_alloc(sizeof (page_t *) * xm->xm_ppb, - KM_NOSLEEP); - - if (!ppa) { - xpgtrunc(xm, xm->xm_max - blocks_left); - xm->xm_max -= blocks_left; - page_unresv(blocks_left * xm->xm_ppb); - if (xargs.xa_fssize) - cmn_err(CE_WARN, - "could only reserve %d blocks " - "for xmemfs", (int)xm->xm_max); - break; - } - } - - (void) page_create_wait(xm->xm_ppb, PG_WAIT); - pplist = page_get_freelist(NULL, 0, &tmpseg, NULL, - xm->xm_bsize, 0, NULL); - - if (pplist == NULL && xm->xm_ppb == 1) { - pplist = page_get_cachelist(NULL, 0, &tmpseg, - NULL, 0, NULL); - } - - if (pplist == NULL) { - page_create_putback(xm->xm_ppb); - if (xm->xm_ppb > 1) - kmem_free(ppa, sizeof (page_t *) * - xm->xm_ppb); - xpgtrunc(xm, xm->xm_max - blocks_left); - xm->xm_max -= blocks_left; - page_unresv(blocks_left * xm->xm_ppb); - if (xargs.xa_fssize) - cmn_err(CE_WARN, - "could only reserve %d blocks " - "for xmemfs", (int)xm->xm_max); - break; - } - - if (PP_ISAGED(pplist) == 0) { - ASSERT(xm->xm_ppb == 1); - page_hashout(pplist, NULL); - } - - for (i = 0; i < xm->xm_ppb; i++) { - pp = pplist; - page_sub(&pplist, pp); - ASSERT(PAGE_EXCL(pp)); - ASSERT(pp->p_vnode == NULL); - ASSERT(!hat_page_is_mapped(pp)); - PP_CLRFREE(pp); - PP_CLRAGED(pp); - if (xm->xm_ppb == 1) - ppa = (page_t **)pp; - else - ppa[i] = pp; - } - - xpgput(xm, ppa); - blocks_left--; - } - if (!xm->xm_xpgcnt) { - /* No pages at all */ - page_unresv(xm->xm_max * xm->xm_ppb); - vmem_free(heap_arena, xm->xm_vmmapaddr, - xm->xm_vmmapsize); - XFREE(xm, xp); - return (ENOMEM); - } - xm->xm_flags |= XARGS_RESERVEMEM; - } - xm->xm_bsize = (size_t)xargs.xa_bsize; - - /* - * find an available minor device number for this mount - */ - mutex_enter(&xmemfs_minor_lock); - do { - xmemfs_minor = (xmemfs_minor + 1) & L_MAXMIN32; - xm->xm_dev = makedevice(xmemfs_major, xmemfs_minor); - } while (vfs_devismounted(xm->xm_dev)); - mutex_exit(&xmemfs_minor_lock); - - /* - * Set but don't bother entering the mutex - * (xmount not on mount list yet) - */ - mutex_init(&xm->xm_contents, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&xm->xm_renamelck, NULL, MUTEX_DEFAULT, NULL); - - xm->xm_vfsp = vfsp; - - vfsp->vfs_data = (caddr_t)xm; - vfsp->vfs_fstype = xmemfsfstype; - vfsp->vfs_dev = xm->xm_dev; - vfsp->vfs_bsize = xm->xm_bsize; - vfsp->vfs_flag |= VFS_NOTRUNC; - vfs_make_fsid(&vfsp->vfs_fsid, xm->xm_dev, xmemfsfstype); - - /* - * allocate and initialize root xmemnode structure - */ - bzero(&rattr, sizeof (struct vattr)); - rattr.va_mode = (mode_t)(S_IFDIR | 0777); - rattr.va_type = VDIR; - rattr.va_rdev = 0; - xmemnode_init(xm, xp, &rattr, cr); - - /* - * Get the mode, uid, and gid from the underlying mount point. - */ - rattr.va_mask = AT_MODE|AT_UID|AT_GID; /* Hint to getattr */ - got_attrs = VOP_GETATTR(mvp, &rattr, 0, cr); - - rw_enter(&xp->xn_rwlock, RW_WRITER); - XNTOV(xp)->v_flag |= VROOT; - - /* - * If the getattr succeeded, use its results. Otherwise allow - * the previously set hardwired defaults to prevail. - */ - if (got_attrs == 0) { - xp->xn_mode = rattr.va_mode; - xp->xn_uid = rattr.va_uid; - xp->xn_gid = rattr.va_gid; - } - - /* - * initialize linked list of xmemnodes so that the back pointer of - * the root xmemnode always points to the last one on the list - * and the forward pointer of the last node is null. - */ - xp->xn_back = xp; - xp->xn_forw = NULL; - xp->xn_nlink = 0; - xm->xm_rootnode = xp; - - xdirinit(xp, xp); - - rw_exit(&xp->xn_rwlock); - - return (0); -} - -static int -xmem_unmount(struct vfs *vfsp, int flag, struct cred *cr) -{ - struct xmount *xm = (struct xmount *)VFSTOXM(vfsp); - struct xmemnode *xp; - - if (secpolicy_fs_unmount(cr, vfsp) != 0) - return (EPERM); - /* - * forced unmount is not supported by this file system - * and thus, ENOTSUP, is being returned. - */ - if (flag & MS_FORCE) - return (ENOTSUP); - - mutex_enter(&xm->xm_contents); - - /* - * Don't close down the xmemfs if there are open files. - * There should be only one file referenced (the rootnode) - * and only one reference to the vnode for that file. - */ - xp = xm->xm_rootnode; - if (XNTOV(xp)->v_count > 1) { - mutex_exit(&xm->xm_contents); - return (EBUSY); - } - - for (xp = xp->xn_forw; xp; xp = xp->xn_forw) { - if (XNTOV(xp)->v_count > 0) { - mutex_exit(&xm->xm_contents); - return (EBUSY); - } - } - - /* - * We can drop the mutex now because no one can find this mount - */ - mutex_exit(&xm->xm_contents); - - /* - * Free all kmemalloc'd and non-anonalloc'd memory associated with - * this filesystem. To do this, we go through the file list twice, - * once to remove all the directory entries, and then to remove - * all the files. We do this because there is useful code in - * xmemnode_free which assumes that the directory entry has been - * removed before the file. - */ - /* - * Remove all directory entries - */ - for (xp = xm->xm_rootnode; xp; xp = xp->xn_forw) { - rw_enter(&xp->xn_rwlock, RW_WRITER); - if (xp->xn_type == VDIR) - xdirtrunc(xp); - rw_exit(&xp->xn_rwlock); - } - - ASSERT(xm->xm_rootnode); - - /* - * We re-acquire the lock to prevent others who have a HOLD on - * a xmemnode via its pages from blowing it away - * (in xmem_inactive) while we're trying to get to it here. Once - * we have a HOLD on it we know it'll stick around. - */ - mutex_enter(&xm->xm_contents); - /* - * Remove all the files (except the rootnode) backwards. - */ - while ((xp = xm->xm_rootnode->xn_back) != xm->xm_rootnode) { - /* - * Blow the xmemnode away by HOLDing it and RELE'ing it. - * The RELE calls inactive and blows it away because there - * we have the last HOLD. - */ - VN_HOLD(XNTOV(xp)); - mutex_exit(&xm->xm_contents); - VN_RELE(XNTOV(xp)); - mutex_enter(&xm->xm_contents); - /* - * It's still there after the RELE. Someone else like pageout - * has a hold on it so wait a bit and then try again - we know - * they'll give it up soon. - */ - if (xp == xm->xm_rootnode->xn_back) { - mutex_exit(&xm->xm_contents); - delay(hz / 4); - mutex_enter(&xm->xm_contents); - } - } - if (xm->xm_flags & XARGS_RESERVEMEM) { - page_t **ppa; - uint_t pindex; - - while ((ppa = xpgget(xm)) != NULL) { - if (xm->xm_ppb == 1) { - /*LINTED*/ - VN_DISPOSE((page_t *)ppa, B_FREE, 0, kcred); - continue; - } - /* free each page */ - for (pindex = 0; pindex < xm->xm_ppb; pindex++) { - ASSERT(ppa[pindex]->p_szc); - ppa[pindex]->p_szc = 0; - /*LINTED*/ - VN_DISPOSE(ppa[pindex], B_FREE, 0, kcred); - } - kmem_free(ppa, sizeof (*ppa) * xm->xm_ppb); - } - xpgdeinit(xm); - page_unresv(xm->xm_max * xm->xm_ppb); - } - mutex_exit(&xm->xm_contents); - - VN_RELE(XNTOV(xm->xm_rootnode)); - - ASSERT(xm->xm_mntpath); - - xmem_memfree(xm->xm_mntpath, strlen(xm->xm_mntpath) + 1); - - mutex_destroy(&xm->xm_contents); - mutex_destroy(&xm->xm_renamelck); - vmem_free(heap_arena, xm->xm_vmmapaddr, xm->xm_vmmapsize); - rmfreemap(xm->xm_map); - xmem_memfree(xm, sizeof (struct xmount)); - - return (0); -} - -/* - * return root xmemnode for given vnode - */ -static int -xmem_root(struct vfs *vfsp, struct vnode **vpp) -{ - struct xmount *xm = (struct xmount *)VFSTOXM(vfsp); - struct xmemnode *xp = xm->xm_rootnode; - struct vnode *vp; - - ASSERT(xp); - - vp = XNTOV(xp); - VN_HOLD(vp); - *vpp = vp; - return (0); -} - -static int -xmem_statvfs(struct vfs *vfsp, struct statvfs64 *sbp) -{ - struct xmount *xm = (struct xmount *)VFSTOXM(vfsp); - long blocks; - dev32_t d32; - - sbp->f_bsize = xm->xm_bsize; - sbp->f_frsize = xm->xm_bsize; /* No fragmentation for now ? */ - - /* - * Find the amount of available physical and memory swap - */ - if (xm->xm_flags & XARGS_RESERVEMEM) - blocks = xm->xm_max - xm->xm_mem; - else - blocks = MAX((long)(freemem - lotsfree - xmemfs_minfree), 0); - - sbp->f_bavail = sbp->f_bfree = (fsblkcnt64_t)blocks; - - /* - * Total number of blocks is what's available plus what's been used - */ - sbp->f_blocks = (fsblkcnt64_t)(sbp->f_bfree + xm->xm_mem); - - /* - * return a somewhat arbitrary number of inodes available - */ - sbp->f_favail = sbp->f_ffree = (fsfilcnt64_t)((xm->xm_max/1024)+1); - (void) cmpldev(&d32, vfsp->vfs_dev); - sbp->f_fsid = d32; - (void) strcpy(sbp->f_basetype, vfssw[xmemfsfstype].vsw_name); - (void) strcpy(sbp->f_fstr, xm->xm_mntpath); - sbp->f_flag = vf_to_stf(vfsp->vfs_flag); - sbp->f_namemax = MAXNAMELEN - 1; - return (0); -} - -static int -xmem_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp) -{ - register struct xfid *xfid; - register struct xmount *xm = (struct xmount *)VFSTOXM(vfsp); - register struct xmemnode *xp = NULL; - - xfid = (struct xfid *)fidp; - *vpp = NULL; - - mutex_enter(&xm->xm_contents); - for (xp = xm->xm_rootnode; xp; xp = xp->xn_forw) { - mutex_enter(&xp->xn_tlock); - if (xp->xn_nodeid == xfid->xfid_ino) { - /* - * If the gen numbers don't match we know the - * file won't be found since only one xmemnode - * can have this number at a time. - */ - if (xp->xn_gen != xfid->xfid_gen || xp->xn_nlink == 0) { - mutex_exit(&xp->xn_tlock); - mutex_exit(&xm->xm_contents); - return (0); - } - *vpp = (struct vnode *)XNTOV(xp); - - VN_HOLD(*vpp); - - if ((xp->xn_mode & S_ISVTX) && - !(xp->xn_mode & (S_IXUSR | S_IFDIR))) { - mutex_enter(&(*vpp)->v_lock); - (*vpp)->v_flag |= VISSWAP; - mutex_exit(&(*vpp)->v_lock); - } - mutex_exit(&xp->xn_tlock); - mutex_exit(&xm->xm_contents); - return (0); - } - mutex_exit(&xp->xn_tlock); - } - mutex_exit(&xm->xm_contents); - return (0); -}
--- a/usr/src/uts/intel/fs/xmemfs/xmem_vnops.c Thu Jan 18 14:25:26 2007 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1736 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/t_lock.h> -#include <sys/systm.h> -#include <sys/sysmacros.h> -#include <sys/user.h> -#include <sys/time.h> -#include <sys/vfs.h> -#include <sys/vnode.h> -#include <sys/file.h> -#include <sys/fcntl.h> -#include <sys/flock.h> -#include <sys/kmem.h> -#include <sys/uio.h> -#include <sys/errno.h> -#include <sys/stat.h> -#include <sys/cred.h> -#include <sys/dirent.h> -#include <sys/pathname.h> -#include <sys/vmsystm.h> -#include <sys/map.h> -#include <sys/fs/xmem.h> -#include <sys/mman.h> -#include <vm/hat.h> -#include <vm/seg.h> -#include <vm/as.h> -#include <vm/page.h> -#include <vm/pvn.h> -#include <sys/cmn_err.h> -#include <sys/debug.h> -#include <sys/swap.h> -#include <sys/buf.h> -#include <sys/vm.h> -#include <sys/vtrace.h> -#include <sys/policy.h> -#include <fs/fs_subr.h> - -static int xmem_getapage(struct vnode *, u_offset_t, size_t, uint_t *, - page_t **, size_t, struct seg *, caddr_t, enum seg_rw, struct cred *); - -#ifndef lint -static int xmem_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, - int, struct cred *); -#endif - - -/* ARGSUSED1 */ -static int -xmem_open(struct vnode **vpp, int flag, struct cred *cred) -{ - /* - * swapon to a xmemfs file is not supported so access - * is denied on open if VISSWAP is set. - */ - if ((*vpp)->v_flag & VISSWAP) - return (EINVAL); - return (0); -} - -/* ARGSUSED1 */ -static int -xmem_close(struct vnode *vp, int flag, int count, offset_t offset, - struct cred *cred) -{ - cleanlocks(vp, ttoproc(curthread)->p_pid, 0); - cleanshares(vp, ttoproc(curthread)->p_pid); - return (0); -} - - -/* - * wrxmem does the real work of write requests for xmemfs. - */ -static int -wrxmem(struct xmount *xm, struct xmemnode *xp, struct uio *uio, - struct cred *cr, struct caller_context *ct) -{ - uint_t blockoffset; /* offset in the block */ - uint_t blkwr; /* offset in blocks into xmem file */ - uint_t blkcnt; - caddr_t base; - ssize_t bytes; /* bytes to uiomove */ - struct vnode *vp; - int error = 0; - size_t bsize = xm->xm_bsize; - rlim64_t limit = uio->uio_llimit; - long oresid = uio->uio_resid; - timestruc_t now; - offset_t offset; - - /* - * xp->xn_size is incremented before the uiomove - * is done on a write. If the move fails (bad user - * address) reset xp->xn_size. - * The better way would be to increment xp->xn_size - * only if the uiomove succeeds. - */ - long xn_size_changed = 0; - offset_t old_xn_size; - - vp = XNTOV(xp); - ASSERT(vp->v_type == VREG); - - XMEMPRINTF(1, ("wrxmem: vp %p resid %lx off %llx\n", - (void *)vp, uio->uio_resid, uio->uio_loffset)); - - ASSERT(RW_WRITE_HELD(&xp->xn_contents)); - ASSERT(RW_WRITE_HELD(&xp->xn_rwlock)); - - if (MANDLOCK(vp, xp->xn_mode)) { - rw_exit(&xp->xn_contents); - /* - * xmem_getattr ends up being called by chklock - */ - error = chklock(vp, FWRITE, - uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct); - - rw_enter(&xp->xn_contents, RW_WRITER); - if (error != 0) { - XMEMPRINTF(8, ("wrxmem: vp %p error %x\n", - (void *)vp, error)); - return (error); - } - } - - if ((offset = uio->uio_loffset) < 0) - return (EINVAL); - - if (offset >= limit) { - proc_t *p = ttoproc(curthread); - - mutex_enter(&p->p_lock); - (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls, - p, RCA_UNSAFE_SIGINFO); - mutex_exit(&p->p_lock); - return (EFBIG); - } - - if (uio->uio_resid == 0) { - XMEMPRINTF(8, ("wrxmem: vp %p resid %lx\n", - (void *)vp, uio->uio_resid)); - return (0); - } - - /* - * Get the highest blocknumber and allocate page array if needed. - * Note that if xm_bsize != PAGESIZE, each ppa[] is pointer to - * a page array rather than just a page. - */ - blkcnt = howmany((offset + uio->uio_resid), bsize); - blkwr = offset >> xm->xm_bshift; /* write begins here */ - - XMEMPRINTF(1, ("wrxmem: vp %p blkcnt %x blkwr %x xn_ppasz %lx\n", - (void *)vp, blkcnt, blkwr, xp->xn_ppasz)); - - /* file size increase */ - if (xp->xn_ppasz < blkcnt) { - - page_t ***ppa; - int ppasz; - uint_t blksinfile = howmany(xp->xn_size, bsize); - - /* - * check if sufficient blocks available for the given offset. - */ - if (blkcnt - blksinfile > xm->xm_max - xm->xm_mem) - return (ENOSPC); - - /* - * to prevent reallocating every time the file grows by a - * single block, double the size of the array. - */ - if (blkcnt < xp->xn_ppasz * 2) - ppasz = xp->xn_ppasz * 2; - else - ppasz = blkcnt; - - - ppa = kmem_zalloc(ppasz * sizeof (page_t **), KM_SLEEP); - - ASSERT(ppa); - - if (xp->xn_ppasz) { - bcopy(xp->xn_ppa, ppa, blksinfile * sizeof (*ppa)); - kmem_free(xp->xn_ppa, xp->xn_ppasz * sizeof (*ppa)); - } - xp->xn_ppa = ppa; - xp->xn_ppasz = ppasz; - - /* - * fill in the 'hole' if write offset beyond file size. This - * helps in creating large files quickly; an application can - * lseek to a large offset and perform a single write - * operation to create the large file. - */ - - if (blksinfile < blkwr) { - - old_xn_size = xp->xn_size; - xp->xn_size = (offset_t)blkwr * bsize; - - XMEMPRINTF(4, ("wrxmem: fill vp %p blks %x to %x\n", - (void *)vp, blksinfile, blkcnt - 1)); - error = xmem_fillpages(xp, vp, - (offset_t)blksinfile * bsize, - (offset_t)(blkcnt - blksinfile) * bsize, 1); - if (error) { - /* truncate file back to original size */ - (void) xmemnode_trunc(xm, xp, old_xn_size); - return (error); - } - /* - * if error on blkwr, this allows truncation of the - * filled hole. - */ - xp->xn_size = old_xn_size; - } - } - - do { - offset_t pagestart, pageend; - page_t **ppp; - - blockoffset = (uint_t)offset & (bsize - 1); - /* - * A maximum of xm->xm_bsize bytes of data is transferred - * each pass through this loop - */ - bytes = MIN(bsize - blockoffset, uio->uio_resid); - - ASSERT(bytes); - - if (offset + bytes >= limit) { - if (offset >= limit) { - error = EFBIG; - goto out; - } - bytes = limit - offset; - } - - - if (!xp->xn_ppa[blkwr]) { - /* zero fill new pages - simplify partial updates */ - error = xmem_fillpages(xp, vp, offset, bytes, 1); - if (error) - return (error); - } - - /* grow the file to the new length */ - if (offset + bytes > xp->xn_size) { - xn_size_changed = 1; - old_xn_size = xp->xn_size; - xp->xn_size = offset + bytes; - } - -#ifdef LOCKNEST - xmem_getpage(); -#endif - - /* xn_ppa[] is a page_t * if ppb == 1 */ - if (xm->xm_ppb == 1) - ppp = (page_t **)&xp->xn_ppa[blkwr]; - else - ppp = &xp->xn_ppa[blkwr][btop(blockoffset)]; - - pagestart = offset & ~(offset_t)(PAGESIZE - 1); - /* - * subtract 1 in case (offset + bytes) is mod PAGESIZE - * so that pageend is the actual index of last page. - */ - pageend = (offset + bytes - 1) & ~(offset_t)(PAGESIZE - 1); - - base = segxmem_getmap(xm->xm_map, vp, - pagestart, pageend - pagestart + PAGESIZE, - ppp, S_WRITE); - - rw_exit(&xp->xn_contents); - - error = uiomove(base + (offset - pagestart), bytes, - UIO_WRITE, uio); - segxmem_release(xm->xm_map, base, - pageend - pagestart + PAGESIZE); - - /* - * Re-acquire contents lock. - */ - rw_enter(&xp->xn_contents, RW_WRITER); - /* - * If the uiomove failed, fix up xn_size. - */ - if (error) { - if (xn_size_changed) { - /* - * The uiomove failed, and we - * allocated blocks,so get rid - * of them. - */ - (void) xmemnode_trunc(xm, xp, old_xn_size); - } - } else { - if ((xp->xn_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) && - (xp->xn_mode & (S_ISUID | S_ISGID)) && - secpolicy_vnode_setid_retain(cr, - (xp->xn_mode & S_ISUID) != 0 && xp->xn_uid == 0) - != 0) { - - /* - * Clear Set-UID & Set-GID bits on - * successful write if not privileged - * and at least one of the execute bits - * is set. If we always clear Set-GID, - * mandatory file and record locking is - * unuseable. - */ - xp->xn_mode &= ~(S_ISUID | S_ISGID); - } - gethrestime(&now); - xp->xn_mtime = now; - xp->xn_ctime = now; - } - offset = uio->uio_loffset; /* uiomove sets uio_loffset */ - blkwr++; - } while (error == 0 && uio->uio_resid > 0 && bytes != 0); - -out: - /* - * If we've already done a partial-write, terminate - * the write but return no error. - */ - if (oresid != uio->uio_resid) - error = 0; - return (error); -} - -/* - * rdxmem does the real work of read requests for xmemfs. - */ -static int -rdxmem( - struct xmount *xm, - struct xmemnode *xp, - struct uio *uio, - struct caller_context *ct) -{ - ulong_t blockoffset; /* offset in xmemfs file (uio_offset) */ - caddr_t base; - ssize_t bytes; /* bytes to uiomove */ - struct vnode *vp; - int error; - uint_t blocknumber; - long oresid = uio->uio_resid; - size_t bsize = xm->xm_bsize; - offset_t offset; - - vp = XNTOV(xp); - - XMEMPRINTF(1, ("rdxmem: vp %p\n", (void *)vp)); - - ASSERT(RW_LOCK_HELD(&xp->xn_contents)); - - if (MANDLOCK(vp, xp->xn_mode)) { - rw_exit(&xp->xn_contents); - /* - * xmem_getattr ends up being called by chklock - */ - error = chklock(vp, FREAD, - uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct); - rw_enter(&xp->xn_contents, RW_READER); - if (error != 0) { - XMEMPRINTF(1, - ("rdxmem: vp %p error %x\n", (void *)vp, error)); - return (error); - } - } - ASSERT(xp->xn_type == VREG); - - if ((offset = uio->uio_loffset) >= MAXOFF_T) { - XMEMPRINTF(1, ("rdxmem: vp %p bad offset %llx\n", - (void *)vp, uio->uio_loffset)); - return (0); - } - if (offset < 0) - return (EINVAL); - - if (uio->uio_resid == 0) { - XMEMPRINTF(1, ("rdxmem: vp %p resid 0\n", (void *)vp)); - return (0); - } - - blocknumber = offset >> xm->xm_bshift; - do { - offset_t diff, pagestart, pageend; - uint_t pageinblock; - - blockoffset = offset & (bsize - 1); - /* - * A maximum of xm->xm_bsize bytes of data is transferred - * each pass through this loop - */ - bytes = MIN(bsize - blockoffset, uio->uio_resid); - - diff = xp->xn_size - offset; - - if (diff <= 0) { - error = 0; - goto out; - } - if (diff < bytes) - bytes = diff; - - if (!xp->xn_ppa[blocknumber]) - if (error = xmem_fillpages(xp, vp, offset, bytes, 1)) { - return (error); - } - /* - * We have to drop the contents lock to prevent the VM - * system from trying to reacquire it in xmem_getpage() - * should the uiomove cause a pagefault. - */ - rw_exit(&xp->xn_contents); - -#ifdef LOCKNEST - xmem_getpage(); -#endif - - /* 2/10 panic in hat_memload_array - len & MMU_OFFSET */ - - pagestart = offset & ~(offset_t)(PAGESIZE - 1); - pageend = (offset + bytes - 1) & ~(offset_t)(PAGESIZE - 1); - if (xm->xm_ppb == 1) - base = segxmem_getmap(xm->xm_map, vp, - pagestart, pageend - pagestart + PAGESIZE, - (page_t **)&xp->xn_ppa[blocknumber], S_READ); - else { - pageinblock = btop(blockoffset); - base = segxmem_getmap(xm->xm_map, vp, - pagestart, pageend - pagestart + PAGESIZE, - &xp->xn_ppa[blocknumber][pageinblock], S_READ); - - } - error = uiomove(base + (blockoffset & (PAGESIZE - 1)), - bytes, UIO_READ, uio); - - segxmem_release(xm->xm_map, base, - pageend - pagestart + PAGESIZE); - /* - * Re-acquire contents lock. - */ - rw_enter(&xp->xn_contents, RW_READER); - - offset = uio->uio_loffset; - blocknumber++; - } while (error == 0 && uio->uio_resid > 0); - -out: - gethrestime(&xp->xn_atime); - - /* - * If we've already done a partial read, terminate - * the read but return no error. - */ - if (oresid != uio->uio_resid) - error = 0; - - return (error); -} - -/* ARGSUSED2 */ -static int -xmem_read(struct vnode *vp, struct uio *uiop, int ioflag, cred_t *cred, - struct caller_context *ct) -{ - struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); - struct xmount *xm = (struct xmount *)VTOXM(vp); - int error; - - /* - * We don't currently support reading non-regular files - */ - if (vp->v_type != VREG) - return (EINVAL); - /* - * xmem_rwlock should have already been called from layers above - */ - ASSERT(RW_READ_HELD(&xp->xn_rwlock)); - - rw_enter(&xp->xn_contents, RW_READER); - - error = rdxmem(xm, xp, uiop, ct); - - rw_exit(&xp->xn_contents); - - return (error); -} - -static int -xmem_write(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred, - struct caller_context *ct) -{ - struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); - struct xmount *xm = (struct xmount *)VTOXM(vp); - int error; - - /* - * We don't currently support writing to non-regular files - */ - if (vp->v_type != VREG) - return (EINVAL); /* XXX EISDIR? */ - - /* - * xmem_rwlock should have already been called from layers above - */ - ASSERT(RW_WRITE_HELD(&xp->xn_rwlock)); - - rw_enter(&xp->xn_contents, RW_WRITER); - - if (ioflag & FAPPEND) { - /* - * In append mode start at end of file. - */ - uiop->uio_loffset = xp->xn_size; - } - - error = wrxmem(xm, xp, uiop, cred, ct); - - rw_exit(&xp->xn_contents); - - return (error); -} - -/* ARGSUSED */ -static int -xmem_ioctl(struct vnode *vp, int com, intptr_t data, int flag, - struct cred *cred, int *rvalp) -{ - return (ENOTTY); -} - -/* ARGSUSED2 */ -static int -xmem_getattr(struct vnode *vp, struct vattr *vap, int flags, struct cred *cred) -{ - struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); - struct xmount *xm = (struct xmount *)VTOXM(vp); - - mutex_enter(&xp->xn_tlock); - - *vap = xp->xn_attr; - - vap->va_mode = xp->xn_mode & MODEMASK; - vap->va_type = vp->v_type; - vap->va_blksize = xm->xm_bsize; - vap->va_nblocks = (fsblkcnt64_t)btodb(ptob(btopr(vap->va_size))); - - mutex_exit(&xp->xn_tlock); - return (0); -} - -/*ARGSUSED*/ -static int -xmem_setattr(struct vnode *vp, struct vattr *vap, int flags, struct cred *cred, - caller_context_t *ct) -{ - struct xmount *xm = (struct xmount *)VTOXM(vp); - struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); - int error; - struct vattr *get; - register long int mask = vap->va_mask; - - /* - * Cannot set these attributes - */ - if (mask & AT_NOSET) - return (EINVAL); - - mutex_enter(&xp->xn_tlock); - - get = &xp->xn_attr; - - error = secpolicy_vnode_setattr(cred, vp, vap, get, flags, - xmem_xaccess, xp); - - if (error != 0) - goto out; - - mask = vap->va_mask; - - /* - * Change file access modes. - */ - if (mask & AT_MODE) { - /* prevent execute permission to be set for regular files */ - if (S_ISREG(get->va_mode)) - vap->va_mode &= ~(S_IXUSR | S_IXGRP | S_IXOTH); - - XMEMPRINTF(1, ("xmem_setattr: va_mode old %x new %x\n", - get->va_mode, vap->va_mode)); - - get->va_mode &= S_IFMT; - get->va_mode |= vap->va_mode & ~S_IFMT; - } - - if (mask & AT_UID) - get->va_uid = vap->va_uid; - if (mask & AT_GID) - get->va_gid = vap->va_gid; - if (mask & AT_ATIME) - get->va_atime = vap->va_atime; - if (mask & AT_MTIME) - get->va_mtime = vap->va_mtime; - if (mask & (AT_UID | AT_GID | AT_MODE | AT_MTIME)) - gethrestime(&get->va_ctime); - - if (mask & AT_SIZE) { - if (vp->v_type == VDIR) { - error = EISDIR; - goto out; - } - /* Don't support large files. */ - if (vap->va_size > MAXOFF_T) { - error = EFBIG; - goto out; - } - if (error = xmem_xaccess(xp, VWRITE, cred)) - goto out; - mutex_exit(&xp->xn_tlock); - - rw_enter(&xp->xn_rwlock, RW_WRITER); - rw_enter(&xp->xn_contents, RW_WRITER); - error = xmemnode_trunc(xm, xp, vap->va_size); - rw_exit(&xp->xn_contents); - rw_exit(&xp->xn_rwlock); - goto out1; - } -out: - mutex_exit(&xp->xn_tlock); -out1: - return (error); -} - -/* ARGSUSED2 */ -static int -xmem_access(struct vnode *vp, int mode, int flags, struct cred *cred) -{ - struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); - int error; - - mutex_enter(&xp->xn_tlock); - error = xmem_xaccess(xp, mode, cred); - mutex_exit(&xp->xn_tlock); - return (error); -} - -/* ARGSUSED3 */ -static int -xmem_lookup(struct vnode *dvp, char *nm, struct vnode **vpp, - struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred) -{ - struct xmemnode *xp = (struct xmemnode *)VTOXN(dvp); - struct xmemnode *nxp = NULL; - int error; - - /* - * Null component name is a synonym for directory being searched. - */ - if (*nm == '\0') { - VN_HOLD(dvp); - *vpp = dvp; - return (0); - } - ASSERT(xp); - - error = xdirlookup(xp, nm, &nxp, cred); - - if (error == 0) { - ASSERT(nxp); - *vpp = XNTOV(nxp); - /* - * If vnode is a device return special vnode instead - */ - if (IS_DEVVP(*vpp)) { - struct vnode *newvp; - - newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, - cred); - VN_RELE(*vpp); - *vpp = newvp; - } - } - return (error); -} - -/*ARGSUSED7*/ -static int -xmem_create(struct vnode *dvp, char *nm, struct vattr *vap, - enum vcexcl exclusive, int mode, struct vnode **vpp, struct cred *cred, - int flag) -{ - struct xmemnode *parent; - struct xmount *xm; - struct xmemnode *self; - int error; - struct xmemnode *oldxp; - -again: - parent = (struct xmemnode *)VTOXN(dvp); - xm = (struct xmount *)VTOXM(dvp); - self = NULL; - error = 0; - oldxp = NULL; - - if (vap->va_type == VREG && (vap->va_mode & VSVTX)) { - /* Must be privileged to set sticky bit */ - if (secpolicy_vnode_stky_modify(cred) != 0) - vap->va_mode &= ~VSVTX; - } else if (vap->va_type == VNON) { - return (EINVAL); - } - - /* - * Null component name is a synonym for directory being searched. - */ - if (*nm == '\0') { - VN_HOLD(dvp); - oldxp = parent; - } else { - error = xdirlookup(parent, nm, &oldxp, cred); - } - - if (error == 0) { /* name found */ - ASSERT(oldxp); - - rw_enter(&oldxp->xn_rwlock, RW_WRITER); - - /* - * if create/read-only an existing - * directory, allow it - */ - if (exclusive == EXCL) - error = EEXIST; - else if ((oldxp->xn_type == VDIR) && (mode & VWRITE)) - error = EISDIR; - else { - error = xmem_xaccess(oldxp, mode, cred); - } - - if (error) { - rw_exit(&oldxp->xn_rwlock); - xmemnode_rele(oldxp); - return (error); - } - *vpp = XNTOV(oldxp); - if ((*vpp)->v_type == VREG && (vap->va_mask & AT_SIZE) && - vap->va_size == 0) { - rw_enter(&oldxp->xn_contents, RW_WRITER); - (void) xmemnode_trunc(xm, oldxp, 0); - rw_exit(&oldxp->xn_contents); - } - rw_exit(&oldxp->xn_rwlock); - if (IS_DEVVP(*vpp)) { - struct vnode *newvp; - - newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, - cred); - VN_RELE(*vpp); - *vpp = newvp; - } - return (0); - } - - if (error != ENOENT) - return (error); - - rw_enter(&parent->xn_rwlock, RW_WRITER); - error = xdirenter(xm, parent, nm, DE_CREATE, - (struct xmemnode *)NULL, (struct xmemnode *)NULL, - vap, &self, cred); - rw_exit(&parent->xn_rwlock); - - if (error) { - if (self) - xmemnode_rele(self); - - if (error == EEXIST) { - /* - * This means that the file was created sometime - * after we checked and did not find it and when - * we went to create it. - * Since creat() is supposed to truncate a file - * that already exits go back to the begining - * of the function. This time we will find it - * and go down the xmem_trunc() path - */ - goto again; - } - return (error); - } - - *vpp = XNTOV(self); - - if (IS_DEVVP(*vpp)) { - struct vnode *newvp; - - newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, - cred); - VN_RELE(*vpp); - *vpp = newvp; - } - - return (0); -} - -static int -xmem_remove(struct vnode *dvp, char *nm, struct cred *cred) -{ - struct xmemnode *parent = (struct xmemnode *)VTOXN(dvp); - int error; - struct xmemnode *xp = NULL; - - error = xdirlookup(parent, nm, &xp, cred); - if (error) - return (error); - - ASSERT(xp); - rw_enter(&parent->xn_rwlock, RW_WRITER); - rw_enter(&xp->xn_rwlock, RW_WRITER); - - if (xp->xn_type != VDIR || - (error = secpolicy_fs_linkdir(cred, dvp->v_vfsp)) == 0) - error = xdirdelete(parent, xp, nm, DR_REMOVE, cred); - - rw_exit(&xp->xn_rwlock); - rw_exit(&parent->xn_rwlock); - xmemnode_rele(xp); - - return (error); -} - -static int -xmem_link(struct vnode *dvp, struct vnode *srcvp, char *tnm, struct cred *cred) -{ - struct xmemnode *parent; - struct xmemnode *from; - struct xmount *xm = (struct xmount *)VTOXM(dvp); - int error; - struct xmemnode *found = NULL; - struct vnode *realvp; - - if (VOP_REALVP(srcvp, &realvp) == 0) - srcvp = realvp; - - parent = (struct xmemnode *)VTOXN(dvp); - from = (struct xmemnode *)VTOXN(srcvp); - - if ((srcvp->v_type == VDIR && - secpolicy_fs_linkdir(cred, dvp->v_vfsp) != 0) || - (from->xn_uid != crgetuid(cred) && secpolicy_basic_link(cred) != 0)) - return (EPERM); - - error = xdirlookup(parent, tnm, &found, cred); - if (error == 0) { - ASSERT(found); - xmemnode_rele(found); - return (EEXIST); - } - - if (error != ENOENT) - return (error); - - rw_enter(&parent->xn_rwlock, RW_WRITER); - error = xdirenter(xm, parent, tnm, DE_LINK, (struct xmemnode *)NULL, - from, NULL, (struct xmemnode **)NULL, cred); - rw_exit(&parent->xn_rwlock); - return (error); -} - -static int -xmem_rename( - struct vnode *odvp, /* source parent vnode */ - char *onm, /* source name */ - struct vnode *ndvp, /* destination parent vnode */ - char *nnm, /* destination name */ - struct cred *cred) -{ - struct xmemnode *fromparent; - struct xmemnode *toparent; - struct xmemnode *fromxp = NULL; /* source xmemnode */ - struct xmount *xm = (struct xmount *)VTOXM(odvp); - int error; - int samedir = 0; /* set if odvp == ndvp */ - struct vnode *realvp; - - if (VOP_REALVP(ndvp, &realvp) == 0) - ndvp = realvp; - - fromparent = (struct xmemnode *)VTOXN(odvp); - toparent = (struct xmemnode *)VTOXN(ndvp); - - mutex_enter(&xm->xm_renamelck); - - /* - * Look up xmemnode of file we're supposed to rename. - */ - error = xdirlookup(fromparent, onm, &fromxp, cred); - if (error) { - mutex_exit(&xm->xm_renamelck); - return (error); - } - - /* - * Make sure we can delete the old (source) entry. This - * requires write permission on the containing directory. If - * that directory is "sticky" it further requires (except for - * for privileged users) that the user own the directory or - * the source entry, or else have permission to write the - * source entry. - */ - if (((error = xmem_xaccess(fromparent, VWRITE, cred)) != 0) || - (error = xmem_sticky_remove_access(fromparent, fromxp, cred)) != 0) - goto done; - - /* - * Check for renaming to or from '.' or '..' or that - * fromxp == fromparent - */ - if ((onm[0] == '.' && - (onm[1] == '\0' || (onm[1] == '.' && onm[2] == '\0'))) || - (nnm[0] == '.' && - (nnm[1] == '\0' || (nnm[1] == '.' && nnm[2] == '\0'))) || - (fromparent == fromxp)) { - error = EINVAL; - goto done; - } - - samedir = (fromparent == toparent); - /* - * Make sure we can search and rename into the new - * (destination) directory. - */ - if (!samedir) { - error = xmem_xaccess(toparent, VEXEC|VWRITE, cred); - if (error) - goto done; - } - - /* - * Link source to new target - */ - rw_enter(&toparent->xn_rwlock, RW_WRITER); - error = xdirenter(xm, toparent, nnm, DE_RENAME, - fromparent, fromxp, (struct vattr *)NULL, - (struct xmemnode **)NULL, cred); - rw_exit(&toparent->xn_rwlock); - - if (error) { - /* - * ESAME isn't really an error; it indicates that the - * operation should not be done because the source and target - * are the same file, but that no error should be reported. - */ - if (error == ESAME) - error = 0; - goto done; - } - - /* - * Unlink from source. - */ - rw_enter(&fromparent->xn_rwlock, RW_WRITER); - rw_enter(&fromxp->xn_rwlock, RW_WRITER); - - error = xdirdelete(fromparent, fromxp, onm, DR_RENAME, cred); - - /* - * The following handles the case where our source xmemnode was - * removed before we got to it. - * - * XXX We should also cleanup properly in the case where xdirdelete - * fails for some other reason. Currently this case shouldn't happen. - * (see 1184991). - */ - if (error == ENOENT) - error = 0; - - rw_exit(&fromxp->xn_rwlock); - rw_exit(&fromparent->xn_rwlock); -done: - xmemnode_rele(fromxp); - mutex_exit(&xm->xm_renamelck); - - return (error); -} - -static int -xmem_mkdir(struct vnode *dvp, char *nm, struct vattr *va, struct vnode **vpp, - struct cred *cred) -{ - struct xmemnode *parent = (struct xmemnode *)VTOXN(dvp); - struct xmemnode *self = NULL; - struct xmount *xm = (struct xmount *)VTOXM(dvp); - int error; - - /* - * Might be dangling directory. Catch it here, - * because a ENOENT return from xdirlookup() is - * an "o.k. return". - */ - if (parent->xn_nlink == 0) - return (ENOENT); - - error = xdirlookup(parent, nm, &self, cred); - if (error == 0) { - ASSERT(self); - xmemnode_rele(self); - return (EEXIST); - } - if (error != ENOENT) - return (error); - - rw_enter(&parent->xn_rwlock, RW_WRITER); - error = xdirenter(xm, parent, nm, DE_MKDIR, - (struct xmemnode *)NULL, (struct xmemnode *)NULL, va, - &self, cred); - if (error) { - rw_exit(&parent->xn_rwlock); - if (self) - xmemnode_rele(self); - return (error); - } - rw_exit(&parent->xn_rwlock); - *vpp = XNTOV(self); - return (0); -} - -static int -xmem_rmdir(struct vnode *dvp, char *nm, struct vnode *cdir, struct cred *cred) -{ - struct xmemnode *parent = (struct xmemnode *)VTOXN(dvp); - struct xmemnode *self = NULL; - struct vnode *vp; - int error = 0; - - /* - * Return error when removing . and .. - */ - if (strcmp(nm, ".") == 0) - return (EINVAL); - if (strcmp(nm, "..") == 0) - return (EEXIST); /* Should be ENOTEMPTY */ - error = xdirlookup(parent, nm, &self, cred); - if (error) - return (error); - - rw_enter(&parent->xn_rwlock, RW_WRITER); - rw_enter(&self->xn_rwlock, RW_WRITER); - - vp = XNTOV(self); - if (vp == dvp || vp == cdir) { - error = EINVAL; - goto done1; - } - if (self->xn_type != VDIR) { - error = ENOTDIR; - goto done1; - } - - mutex_enter(&self->xn_tlock); - if (self->xn_nlink > 2) { - mutex_exit(&self->xn_tlock); - error = EEXIST; - goto done1; - } - mutex_exit(&self->xn_tlock); - - if (vn_vfswlock(vp)) { - error = EBUSY; - goto done1; - } - if (vn_mountedvfs(vp) != NULL) { - error = EBUSY; - goto done; - } - - /* - * Check for an empty directory - * i.e. only includes entries for "." and ".." - */ - if (self->xn_dirents > 2) { - error = EEXIST; /* SIGH should be ENOTEMPTY */ - /* - * Update atime because checking xn_dirents is logically - * equivalent to reading the directory - */ - gethrestime(&self->xn_atime); - goto done; - } - - error = xdirdelete(parent, self, nm, DR_RMDIR, cred); -done: - vn_vfsunlock(vp); -done1: - rw_exit(&self->xn_rwlock); - rw_exit(&parent->xn_rwlock); - xmemnode_rele(self); - - return (error); -} - -/* ARGSUSED2 */ - -static int -xmem_readdir(struct vnode *vp, struct uio *uiop, struct cred *cred, int *eofp) -{ - struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); - struct xdirent *xdp; - int error; - register struct dirent64 *dp; - register ulong_t offset; - register ulong_t total_bytes_wanted; - register long outcount = 0; - register long bufsize; - int reclen; - caddr_t outbuf; - - if (uiop->uio_loffset >= MAXOFF_T) { - if (eofp) - *eofp = 1; - return (0); - } - /* - * assuming system call has already called xmem_rwlock - */ - ASSERT(RW_READ_HELD(&xp->xn_rwlock)); - - if (uiop->uio_iovcnt != 1) - return (EINVAL); - - if (vp->v_type != VDIR) - return (ENOTDIR); - - /* - * There's a window here where someone could have removed - * all the entries in the directory after we put a hold on the - * vnode but before we grabbed the rwlock. Just return unless - * there are still references to the current file in which case panic. - */ - if (xp->xn_dir == NULL) { - if (xp->xn_nlink) - cmn_err(CE_PANIC, "empty directory 0x%p", (void *)xp); - return (0); - } - - /* - * Get space for multiple directory entries - */ - total_bytes_wanted = uiop->uio_iov->iov_len; - bufsize = total_bytes_wanted + sizeof (struct dirent64); - outbuf = kmem_alloc(bufsize, KM_SLEEP); - - dp = (struct dirent64 *)outbuf; - - - offset = 0; - xdp = xp->xn_dir; - while (xdp) { - offset = xdp->xd_offset; - if (offset >= uiop->uio_offset) { - reclen = (int)DIRENT64_RECLEN(strlen(xdp->xd_name)); - if (outcount + reclen > total_bytes_wanted) - break; - ASSERT(xdp->xd_xmemnode != NULL); - - /* use strncpy(9f) to zero out uninitialized bytes */ - - ASSERT(strlen(xdp->xd_name) + 1 <= - DIRENT64_NAMELEN(reclen)); - (void) strncpy(dp->d_name, xdp->xd_name, - DIRENT64_NAMELEN(reclen)); - dp->d_reclen = (ushort_t)reclen; - dp->d_ino = (ino64_t)xdp->xd_xmemnode->xn_nodeid; - dp->d_off = (offset_t)xdp->xd_offset + 1; - dp = (struct dirent64 *) - ((uintptr_t)dp + dp->d_reclen); - outcount += reclen; - ASSERT(outcount <= bufsize); - } - xdp = xdp->xd_next; - } - error = uiomove(outbuf, outcount, UIO_READ, uiop); - if (!error) { - /* If we reached the end of the list our offset */ - /* should now be just past the end. */ - if (!xdp) { - offset += 1; - if (eofp) - *eofp = 1; - } else if (eofp) - *eofp = 0; - uiop->uio_offset = offset; - } - gethrestime(&xp->xn_atime); - kmem_free(outbuf, bufsize); - return (error); -} - -static int -xmem_symlink(struct vnode *dvp, char *lnm, struct vattr *tva, char *tnm, - struct cred *cred) -{ - struct xmemnode *parent = (struct xmemnode *)VTOXN(dvp); - struct xmemnode *self = (struct xmemnode *)NULL; - struct xmount *xm = (struct xmount *)VTOXM(dvp); - char *cp = NULL; - int error; - size_t len; - - error = xdirlookup(parent, lnm, &self, cred); - if (error == 0) { - /* - * The entry already exists - */ - xmemnode_rele(self); - return (EEXIST); /* was 0 */ - } - - if (error != ENOENT) { - if (self != NULL) - xmemnode_rele(self); - return (error); - } - - rw_enter(&parent->xn_rwlock, RW_WRITER); - error = xdirenter(xm, parent, lnm, DE_CREATE, (struct xmemnode *)NULL, - (struct xmemnode *)NULL, tva, &self, cred); - rw_exit(&parent->xn_rwlock); - - if (error) { - if (self) - xmemnode_rele(self); - return (error); - } - len = strlen(tnm) + 1; - cp = xmem_memalloc(len, 0); - if (cp == NULL) { - xmemnode_rele(self); - return (ENOSPC); - } - (void) strcpy(cp, tnm); - - self->xn_symlink = cp; - self->xn_size = len - 1; - xmemnode_rele(self); - return (error); -} - -/* ARGSUSED2 */ -static int -xmem_readlink(struct vnode *vp, struct uio *uiop, struct cred *cred) -{ - struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); - int error = 0; - - if (vp->v_type != VLNK) - return (EINVAL); - - rw_enter(&xp->xn_rwlock, RW_READER); - rw_enter(&xp->xn_contents, RW_READER); - error = uiomove(xp->xn_symlink, xp->xn_size, UIO_READ, uiop); - gethrestime(&xp->xn_atime); - rw_exit(&xp->xn_contents); - rw_exit(&xp->xn_rwlock); - return (error); -} - -/* ARGSUSED */ -static int -xmem_fsync(struct vnode *vp, int syncflag, struct cred *cred) -{ - return (0); -} - -/* ARGSUSED */ -static void -xmem_inactive(struct vnode *vp, struct cred *cred) -{ - struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); - struct xmount *xm = (struct xmount *)VFSTOXM(vp->v_vfsp); - - rw_enter(&xp->xn_rwlock, RW_WRITER); -top: - mutex_enter(&xp->xn_tlock); - mutex_enter(&vp->v_lock); - ASSERT(vp->v_count >= 1); - - /* - * If we don't have the last hold or the link count is non-zero, - * there's little to do -- just drop our hold. - */ - if (vp->v_count > 1 || xp->xn_nlink != 0) { - vp->v_count--; - mutex_exit(&vp->v_lock); - mutex_exit(&xp->xn_tlock); - rw_exit(&xp->xn_rwlock); - return; - } - - /* - * We have the last hold *and* the link count is zero, so this - * xmemnode is dead from the filesystem's viewpoint. However, - * if the xmemnode has any pages associated with it (i.e. if it's - * a normal file with non-zero size), the xmemnode can still be - * discovered by pageout or fsflush via the page vnode pointers. - * In this case we must drop all our locks, truncate the xmemnode, - * and try the whole dance again. - */ - if (xp->xn_size != 0) { - if (xp->xn_type == VREG) { - mutex_exit(&vp->v_lock); - mutex_exit(&xp->xn_tlock); - rw_enter(&xp->xn_contents, RW_WRITER); - (void) xmemnode_trunc(xm, xp, 0); - rw_exit(&xp->xn_contents); - ASSERT(xp->xn_size == 0); - ASSERT(xp->xn_nblocks == 0); - goto top; - } - if (xp->xn_type == VLNK) - xmem_memfree(xp->xn_symlink, xp->xn_size + 1); - } - - mutex_exit(&vp->v_lock); - mutex_exit(&xp->xn_tlock); - mutex_enter(&xm->xm_contents); - if (xp->xn_forw == NULL) - xm->xm_rootnode->xn_back = xp->xn_back; - else - xp->xn_forw->xn_back = xp->xn_back; - xp->xn_back->xn_forw = xp->xn_forw; - mutex_exit(&xm->xm_contents); - rw_exit(&xp->xn_rwlock); - rw_destroy(&xp->xn_rwlock); - mutex_destroy(&xp->xn_tlock); - vn_free(xp->xn_vnode); - xmem_memfree(xp, sizeof (struct xmemnode)); -} - -static int -xmem_fid(struct vnode *vp, struct fid *fidp) -{ - struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); - struct xfid *xfid; - - if (fidp->fid_len < (sizeof (struct xfid) - sizeof (ushort_t))) { - fidp->fid_len = sizeof (struct xfid) - sizeof (ushort_t); - return (ENOSPC); - } - - xfid = (struct xfid *)fidp; - bzero(xfid, sizeof (struct xfid)); - xfid->xfid_len = (int)sizeof (struct xfid) - sizeof (ushort_t); - - xfid->xfid_ino = xp->xn_nodeid; - xfid->xfid_gen = xp->xn_gen; - - return (0); -} - - -/* - * Return all the pages from [off..off+len] in given file - */ -static int -xmem_getpage(struct vnode *vp, offset_t off, size_t len, uint_t *protp, - page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, - enum seg_rw rw, struct cred *cr) -{ - int err = 0; - struct xmemnode *xp = VTOXN(vp); - struct xmount *xm = (struct xmount *)VTOXM(vp); - timestruc_t now; - - cmn_err(CE_PANIC, "xmem_getpage"); - rw_enter(&xp->xn_contents, RW_READER); - - if (off + len > xp->xn_size + xm->xm_bsize) { - rw_exit(&xp->xn_contents); - return (EFAULT); - } - rw_exit(&xp->xn_contents); - - if (len <= xm->xm_bsize) - err = xmem_getapage(vp, (u_offset_t)off, len, protp, pl, plsz, - seg, addr, rw, cr); - else - err = pvn_getpages(xmem_getapage, vp, (u_offset_t)off, len, - protp, pl, plsz, seg, addr, rw, cr); - - rw_enter(&xp->xn_contents, RW_WRITER); - gethrestime(&now); - xp->xn_atime = now; - if (rw == S_WRITE) - xp->xn_mtime = now; - rw_exit(&xp->xn_contents); - - return (err); -} - -/* - * Called from pvn_getpages to get a particular page. - */ -/*ARGSUSED*/ -static int -xmem_getapage(struct vnode *vp, u_offset_t off, size_t len, uint_t *protp, - page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, - enum seg_rw rw, struct cred *cr) -{ - cmn_err(CE_PANIC, "xmem_getapage"); - return (0); -} - -/* ARGSUSED */ -int -xmem_putpage(struct vnode *vp, offset_t off, size_t len, int flags, - struct cred *cr) -{ - return (0); -} - -#ifndef lint -/* - * Write out a single page. - * For xmemfs this means choose a physical swap slot and write the page - * out using VOP_PAGEIO. For performance, we attempt to kluster; i.e., - * we try to find a bunch of other dirty pages adjacent in the file - * and a bunch of contiguous swap slots, and then write all the pages - * out in a single i/o. - */ -/*ARGSUSED*/ -static int -xmem_putapage(struct vnode *vp, page_t *pp, u_offset_t *offp, - size_t *lenp, int flags, struct cred *cr) -{ - cmn_err(CE_PANIC, "xmem putapage"); - return (1); -} -#endif - - -static int -xmem_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp, - size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, - struct cred *cred) -{ - struct seg *seg; - struct segxmem_crargs xmem_a; - struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); - struct xmount *xm = (struct xmount *)VTOXM(vp); - uint_t blocknumber; - int error; - -#ifdef lint - maxprot = maxprot; -#endif - if (vp->v_flag & VNOMAP) - return (ENOSYS); - - if (off < 0) - return (EINVAL); - - /* offset, length and address has to all be block aligned */ - - if (off & (xm->xm_bsize - 1) || len & (xm->xm_bsize - 1) || - ((ulong_t)*addrp) & (xm->xm_bsize - 1)) { - - return (EINVAL); - } - - if (vp->v_type != VREG) - return (ENODEV); - - if (flags & MAP_PRIVATE) - return (EINVAL); /* XXX need to be handled */ - - /* - * Don't allow mapping to locked file - */ - if (vn_has_mandatory_locks(vp, xp->xn_mode)) { - return (EAGAIN); - } - - if (error = xmem_fillpages(xp, vp, off, len, 1)) { - return (error); - } - - blocknumber = off >> xm->xm_bshift; - - if (flags & MAP_FIXED) { - /* - * User specified address - blow away any previous mappings - */ - AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); - seg = as_findseg(as, *addrp, 0); - - /* - * Fast path. segxmem_remap will fail if this is the wrong - * segment or if the len is beyond end of seg. If it fails, - * we do the regular stuff thru as_* routines. - */ - - if (seg && (segxmem_remap(seg, vp, *addrp, len, - &xp->xn_ppa[blocknumber], prot) == 0)) { - AS_LOCK_EXIT(as, &as->a_lock); - return (0); - } - AS_LOCK_EXIT(as, &as->a_lock); - if (seg) - (void) as_unmap(as, *addrp, len); - - as_rangelock(as); - - error = valid_usr_range(*addrp, len, prot, as, as->a_userlimit); - - if (error != RANGE_OKAY || - as_gap(as, len, addrp, &len, AH_CONTAIN, *addrp)) { - as_rangeunlock(as); - return (EINVAL); - } - - } else { - as_rangelock(as); - map_addr(addrp, len, (offset_t)off, 1, flags); - } - - if (*addrp == NULL) { - as_rangeunlock(as); - return (ENOMEM); - } - - xmem_a.xma_vp = vp; - xmem_a.xma_offset = (u_offset_t)off; - xmem_a.xma_prot = prot; - xmem_a.xma_cred = cred; - xmem_a.xma_ppa = &xp->xn_ppa[blocknumber]; - xmem_a.xma_bshift = xm->xm_bshift; - - error = as_map(as, *addrp, len, segxmem_create, &xmem_a); - - as_rangeunlock(as); - return (error); -} - -/* ARGSUSED */ -static int -xmem_addmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr, - size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, - struct cred *cred) -{ - return (0); -} - -/* ARGSUSED */ -static int -xmem_delmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr, - size_t len, uint_t prot, uint_t maxprot, uint_t flags, - struct cred *cred) -{ - return (0); -} - -static int -xmem_freesp(struct vnode *vp, struct flock64 *lp, int flag) -{ - register int i; - register struct xmemnode *xp = VTOXN(vp); - int error; - - ASSERT(vp->v_type == VREG); - ASSERT(lp->l_start >= 0); - - if (lp->l_len != 0) - return (EINVAL); - - rw_enter(&xp->xn_rwlock, RW_WRITER); - if (xp->xn_size == lp->l_start) { - rw_exit(&xp->xn_rwlock); - return (0); - } - - /* - * Check for any mandatory locks on the range - */ - if (MANDLOCK(vp, xp->xn_mode)) { - long save_start; - - save_start = lp->l_start; - - if (xp->xn_size < lp->l_start) { - /* - * "Truncate up" case: need to make sure there - * is no lock beyond current end-of-file. To - * do so, we need to set l_start to the size - * of the file temporarily. - */ - lp->l_start = xp->xn_size; - } - lp->l_type = F_WRLCK; - lp->l_sysid = 0; - lp->l_pid = ttoproc(curthread)->p_pid; - i = (flag & (FNDELAY|FNONBLOCK)) ? 0 : SLPFLCK; - if ((i = reclock(vp, lp, i, 0, lp->l_start, NULL)) != 0 || - lp->l_type != F_UNLCK) { - rw_exit(&xp->xn_rwlock); - return (i ? i : EAGAIN); - } - - lp->l_start = save_start; - } - - rw_enter(&xp->xn_contents, RW_WRITER); - error = xmemnode_trunc((struct xmount *)VFSTOXM(vp->v_vfsp), - xp, lp->l_start); - rw_exit(&xp->xn_contents); - rw_exit(&xp->xn_rwlock); - return (error); -} - -/* ARGSUSED */ -static int -xmem_space(struct vnode *vp, int cmd, struct flock64 *bfp, int flag, - offset_t offset, struct cred *cred, caller_context_t *ct) -{ - int error; - - if (cmd != F_FREESP) - return (EINVAL); - if ((error = convoff(vp, bfp, 0, (offset_t)offset)) == 0) { - if ((bfp->l_start > MAXOFF_T) || (bfp->l_len > MAXOFF_T)) - return (EFBIG); - error = xmem_freesp(vp, bfp, flag); - } - return (error); -} - -/* ARGSUSED */ -static int -xmem_seek(struct vnode *vp, offset_t ooff, offset_t *noffp) -{ - return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); -} - -/* ARGSUSED2 */ -static int -xmem_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp) -{ - struct xmemnode *xp = VTOXN(vp); - - if (write_lock) { - rw_enter(&xp->xn_rwlock, RW_WRITER); - } else { - rw_enter(&xp->xn_rwlock, RW_READER); - } - return (write_lock); -} - -/* ARGSUSED1 */ -static void -xmem_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp) -{ - struct xmemnode *xp = VTOXN(vp); - - rw_exit(&xp->xn_rwlock); -} - -struct vnodeops *xmem_vnodeops; - -const fs_operation_def_t xmem_vnodeops_template[] = { - VOPNAME_OPEN, xmem_open, - VOPNAME_CLOSE, xmem_close, - VOPNAME_READ, xmem_read, - VOPNAME_WRITE, xmem_write, - VOPNAME_IOCTL, xmem_ioctl, - VOPNAME_GETATTR, xmem_getattr, - VOPNAME_SETATTR, xmem_setattr, - VOPNAME_ACCESS, xmem_access, - VOPNAME_LOOKUP, xmem_lookup, - VOPNAME_CREATE, xmem_create, - VOPNAME_REMOVE, xmem_remove, - VOPNAME_LINK, xmem_link, - VOPNAME_RENAME, xmem_rename, - VOPNAME_MKDIR, xmem_mkdir, - VOPNAME_RMDIR, xmem_rmdir, - VOPNAME_READDIR, xmem_readdir, - VOPNAME_SYMLINK, xmem_symlink, - VOPNAME_READLINK, xmem_readlink, - VOPNAME_FSYNC, xmem_fsync, - VOPNAME_INACTIVE, (fs_generic_func_p) xmem_inactive, - VOPNAME_FID, xmem_fid, - VOPNAME_RWLOCK, xmem_rwlock, - VOPNAME_RWUNLOCK, (fs_generic_func_p) xmem_rwunlock, - VOPNAME_SEEK, xmem_seek, - VOPNAME_SPACE, xmem_space, - VOPNAME_GETPAGE, xmem_getpage, - VOPNAME_PUTPAGE, xmem_putpage, - VOPNAME_MAP, (fs_generic_func_p) xmem_map, - VOPNAME_ADDMAP, (fs_generic_func_p) xmem_addmap, - VOPNAME_DELMAP, xmem_delmap, - NULL, NULL -};
--- a/usr/src/uts/intel/sys/Makefile Thu Jan 18 14:25:26 2007 -0800 +++ b/usr/src/uts/intel/sys/Makefile Thu Jan 18 16:23:02 2007 -0800 @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -99,10 +99,6 @@ SUBHDRS = \ dktp/mscsi.h -FSHDRS = \ - seg_xmem.h \ - xmem.h - ROOTDIR= $(ROOT)/usr/include/sys SCSIDIR= $(ROOTDIR)/scsi SCSIDIRS= $(SCSIDIR) $(SCSIDIR)/conf $(SCSIDIR)/generic \ @@ -115,12 +111,9 @@ ROOTHDRS= $(HDRS:%=$(ROOTDIR)/%) $(SUBHDRS:%=$(ROOTDIR)/%) $(CLOSED_BUILD)ROOTHDRS += $(CLOSEDHDRS:%=$(ROOTDIR)/%) -ROOTFSHDRS= $(FSHDRS:%=$(ROOTDIR)/fs/%) - CHECKHDRS= \ $(HDRS:%.h=%.check) \ - $(SUBHDRS:%.h=%.check) \ - $(FSHDRS:%.h=fs/%.check) + $(SUBHDRS:%.h=%.check) $(CLOSED_BUILD)CHECKHDRS += $(CLOSEDHDRS:%.h=$(CLOSED)/uts/intel/sys/%.check) @@ -133,9 +126,9 @@ .KEEP_STATE: -.PARALLEL: $(CHECKHDRS) $(ROOTHDRS) $(ROOTFSHDRS) +.PARALLEL: $(CHECKHDRS) $(ROOTHDRS) -install_h: $(ROOTDIRS) .WAIT $(ROOTHDRS) $(ROOTFSHDRS) +install_h: $(ROOTDIRS) .WAIT $(ROOTHDRS) $(ROOTDIRS): $(INS.dir)
--- a/usr/src/uts/intel/sys/fs/seg_xmem.h Thu Jan 18 14:25:26 2007 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,108 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_FS_SEG_XMEM_H -#define _SYS_FS_SEG_XMEM_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/map.h> - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Statistics for segxmem operations. - * - * No explicit locking to protect these stats. - */ -struct segxmemcnt { - kstat_named_t sx_fault; /* number of segxmem_faults */ - kstat_named_t sx_getmapflt; /* number of segxmem_getmaps */ - kstat_named_t sx_release; /* releases with */ - kstat_named_t sx_pagecreate; /* pagecreates */ -}; - - -#if defined(_KERNEL) - -struct segxmem_crargs { - struct vnode *xma_vp; /* vnode maped from */ - u_offset_t xma_offset; /* starting offset for mapping */ - /* offset above could be invalid due to remaps, but ppa will be valid */ - page_t ***xma_ppa; /* page list for this mapping */ - uint_t xma_bshift; /* for converting offset to block # */ - struct cred *xma_cred; /* credentials */ - uchar_t xma_prot; -}; - - -struct sx_prot { - struct sx_prot *spc_next; /* Next such one */ - pgcnt_t spc_pageindex; /* First page with changed prot */ - pgcnt_t spc_numpages; /* & number of such pages */ - uchar_t spc_prot; -}; - -struct segxmem_data { - struct vnode *sxd_vp; /* vnode for this mapping */ - offset_t sxd_offset; /* & initial offset */ - /* - * The above may not be valid after remap, but ppa below will track - * the remaps. - */ - size_t sxd_bsize; /* block size */ - uint_t sxd_bshift; /* for converting offset to block # */ - size_t sxd_softlockcnt; - struct sx_prot *sxd_spc; /* linked list of changed protections */ - uchar_t sxd_prot; -}; - -#define sx_blocks(seg, sxd) howmany((seg)->s_size, 1 << (sxd)->sxd_bshift) - -/* - * Public seg_xmem segment operations. - */ -extern int segxmem_create(struct seg *, struct segxmem_crargs *); -/* - * extern faultcode_t segxmem_fault(struct hat *, struct seg *, caddr_t, size_t, - * enum fault_type, enum seg_rw); - */ -extern caddr_t segxmem_getmap(struct map *, struct vnode *, u_offset_t, - size_t, page_t **, enum seg_rw); -extern void segxmem_release(struct map *, caddr_t, size_t); -extern int segxmem_remap(struct seg *, struct vnode *vp, caddr_t, size_t, - page_t ***, uchar_t); -extern void segxmem_inval(struct seg *, struct vnode *, u_offset_t); - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_FS_SEG_XMEM_H */
--- a/usr/src/uts/intel/sys/fs/xmem.h Thu Jan 18 14:25:26 2007 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,282 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_FS_XMEM_H -#define _SYS_FS_XMEM_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/t_lock.h> -#include <vm/seg.h> -#include <vm/seg_vn.h> -#include <sys/fs/seg_xmem.h> - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef _KERNEL - -/* - * xmemnode is the file system dependent node for xmemfs. - * - * xn_rwlock protects access of the directory list at xn_dir - * as well as syncronizing read and writes to the xmemnode - * - * xn_contents protects growing, shrinking, reading and writing - * the file along with xn_rwlock (see below). - * - * xn_tlock protects updates to xn_mode and xn_nlink - * - * xm_contents in the xmount filesystem data structure protects - * xn_forw and xn_back which are used to maintain a linked - * list of all xmemfs files associated with that file system - * - * XXX - valid ? The pp array represents the store for xmemfs. - * To grow or shrink the file or fill in holes requires - * manipulation of the pp array. These operations are protected - * by a combination of xn_rwlock and xn_contents. Growing or shrinking - * the array requires the write lock on xn_rwlock and xn_contents. - * Filling in a slot in the array requires the write lock on xn_contents. - * Reading the array requires the read lock on xn_contents. - * - * The ordering of the locking is: - * xn_rwlock -> xn_contents -> page locks on pages in file - * - * xn_tlock doesn't require any xmemnode locks - */ - -struct xmemnode { - struct xmemnode *xn_back; /* linked list of xmemnodes */ - struct xmemnode *xn_forw; /* linked list of xmemnodes */ - union { - struct { - struct xdirent *un_dirlist; /* dirent list */ - uint_t un_dirents; /* number of dirents */ - } un_dirstruct; - char *un_symlink; /* pointer to symlink */ - struct { - page_t ***un_ppa; /* page backing for file */ - size_t un_size; /* size repres. by array */ - } un_ppstruct; - } un_xmemnode; - struct vnode *xn_vnode; /* vnode for this xmemnode */ - int xn_gen; /* pseudo gen number for xfid */ - struct vattr xn_attr; /* attributes */ - krwlock_t xn_contents; /* vm side -serialize mods */ - krwlock_t xn_rwlock; /* rw,trunc size - serialize */ - /* mods and directory updates */ - kmutex_t xn_tlock; /* time, flag, and nlink lock */ -}; - -/* - * each xn_ppa[] entry points to an array of page_t pointers. - */ -#define xn_ppa un_xmemnode.un_ppstruct.un_ppa -#define xn_ppasz un_xmemnode.un_ppstruct.un_size -#define xn_dir un_xmemnode.un_dirstruct.un_dirlist -#define xn_dirents un_xmemnode.un_dirstruct.un_dirents -#define xn_symlink un_xmemnode.un_symlink - -/* - * Attributes - */ -#define xn_mask xn_attr.va_mask -#define xn_type xn_attr.va_type -#define xn_mode xn_attr.va_mode -#define xn_uid xn_attr.va_uid -#define xn_gid xn_attr.va_gid -#define xn_fsid xn_attr.va_fsid -#define xn_nodeid xn_attr.va_nodeid -#define xn_nlink xn_attr.va_nlink -#define xn_size xn_attr.va_size -#define xn_atime xn_attr.va_atime -#define xn_mtime xn_attr.va_mtime -#define xn_ctime xn_attr.va_ctime -#define xn_rdev xn_attr.va_rdev -#define xn_blksize xn_attr.va_blksize -#define xn_nblocks xn_attr.va_nblocks -#define xn_seq xn_attr.va_seq - -/* - * xmemfs directories are made up of a linked list of xdirent structures - * hanging off directory xmemnodes. File names are not fixed length, - * but are null terminated. - */ -struct xdirent { - struct xmemnode *xd_xmemnode; /* xmemnode for this file */ - struct xdirent *xd_next; /* next directory entry */ - struct xdirent *xd_prev; /* prev directory entry */ - uint_t xd_offset; /* "offset" of dir entry */ - uint_t xd_hash; /* a hash of xd_name */ - struct xdirent *xd_link; /* linked via the hash table */ - struct xmemnode *xd_parent; /* parent, dir we are in */ - char *xd_name; /* must be null terminated */ - /* max length is MAXNAMELEN */ -}; - -/* - * xfid overlays the fid structure (for VFS_VGET) - */ -struct xfid { - uint16_t xfid_len; - ino32_t xfid_ino; - int32_t xfid_gen; -}; - -#define ESAME (-1) /* trying to rename linked files (special) */ - -extern struct vnodeops *xmem_vnodeops; -extern const struct fs_operation_def xmem_vnodeops_template[]; - -/* - * xmemfs per-mount data structure. - * - * All fields are protected by xm_contents. - * File renames on a particular file system are protected xm_renamelck. - */ -struct xmount { - struct vfs *xm_vfsp; /* filesystem's vfs struct */ - struct xmemnode *xm_rootnode; /* root xmemnode */ - char *xm_mntpath; /* name of xmemfs mount point */ - uint_t xm_flags; /* Miscellaneous Flags */ - size_t xm_bsize; /* block size for this file system */ - uint_t xm_bshift; /* for converting offset to block # */ - pgcnt_t xm_ppb; /* pages per block */ - struct map *xm_map; /* Map for kernel addresses */ - caddr_t xm_mapaddr; /* Base of above map */ - size_t xm_mapsize; /* size of above map */ - caddr_t xm_vmmapaddr; /* Base of heap for above map */ - size_t xm_vmmapsize; /* size of heap for above map */ - ulong_t xm_max; /* file system max reservation */ - pgcnt_t xm_mem; /* pages of reserved memory */ - dev_t xm_dev; /* unique dev # of mounted `device' */ - uint_t xm_gen; /* pseudo generation number for files */ - kmutex_t xm_contents; /* lock for xmount structure */ - kmutex_t xm_renamelck; /* rename lock for this mount */ - uint_t xm_xpgcnt; /* index and count for xpg_array */ - void **xm_xpgarray; /* array of pointers */ -}; - -#ifndef DEBUG -#define XMEMPRINTF(level, args) -#else -extern int xmemlevel; -/*PRINTFLIKE1*/ -extern void xmemprintf(const char *, ...) - __KPRINTFLIKE(1); -#define XMEMPRINTF(level, args) if (level >= xmemlevel) xmemprintf args -#endif - -#endif /* _KERNEL */ - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif - -/* - * Make sizeof struct xmemfs_args the same on x86 and amd64. - */ - -struct xmemfs_args { - offset_t xa_fssize; /* file system size in bytes */ - offset_t xa_bsize; /* block size for this file system */ - uint_t xa_flags; /* flags for this mount */ -}; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - -/* Flag bits */ -#define XARGS_RESERVEMEM 1 /* pre reserve memory */ -#define XARGS_LARGEPAGES 2 /* Use large pages */ - -#ifdef _KERNEL - -/* - * File system independent to xmemfs conversion macros - */ -#define VFSTOXM(vfsp) ((struct xmount *)(vfsp)->vfs_data) -#define VTOXM(vp) ((struct xmount *)(vp)->v_vfsp->vfs_data) -#define VTOXN(vp) ((struct xmemnode *)(vp)->v_data) -#define XNTOV(xp) ((xp)->xn_vnode) -#define xmemnode_hold(tp) VN_HOLD(XNTOV(tp)) -#define xmemnode_rele(tp) VN_RELE(XNTOV(tp)) - -/* - * enums - */ -enum de_op { DE_CREATE, DE_MKDIR, DE_LINK, DE_RENAME }; /* direnter ops */ -enum dr_op { DR_REMOVE, DR_RMDIR, DR_RENAME }; /* dirremove ops */ - -/* - * xmemfs_minfree is the amount (in pages) of memory that xmemfs - * leaves free for the rest of the system. - * NB: If xmemfs allocates too much space, other processes will be - * unable to execute. 320 is chosen arbitrarily to be about right for - * an RDBMS environment with all of it's buffers coming from xmemfs. - */ -#define XMEMMINFREE 320 * 1024 * 1024 /* 320 Megabytes */ -/* - * number of simultaneous reads/writes is limited by NUM_SIMULMAPS - * below. We cannot set it much higher as we expect typical block - * size to be 2MB or 4MB and we cannot afford to reserve and keep - * too much kernel virtual memory for ourselves. - */ -#define SEGXMEM_NUM_SIMULMAPS 4 - -extern pgcnt_t xmemfs_minfree; /* memory in pages */ - -extern void xmemnode_init(struct xmount *, struct xmemnode *, - struct vattr *, struct cred *); -extern int xmemnode_trunc(struct xmount *, struct xmemnode *, u_offset_t); -extern int xdirlookup(struct xmemnode *, char *, struct xmemnode **, - struct cred *); -extern int xdirdelete(struct xmemnode *, struct xmemnode *, char *, - enum dr_op, struct cred *); -extern void xdirinit(struct xmemnode *, struct xmemnode *); -extern void xdirtrunc(struct xmemnode *); -extern void *xmem_memalloc(size_t, int); -extern void xmem_memfree(void *, size_t); -extern int xmem_xaccess(void *, int, struct cred *); -extern int xdirenter(struct xmount *, struct xmemnode *, char *, - enum de_op, struct xmemnode *, struct xmemnode *, struct vattr *, - struct xmemnode **, struct cred *); -extern int xmem_fillpages(struct xmemnode *, struct vnode *, offset_t, - offset_t, int); -extern int xmem_sticky_remove_access(struct xmemnode *, struct xmemnode *, - struct cred *); - -#endif /* _KERNEL */ - -#define XMEM_MUSTHAVE 1 - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_FS_XMEM_H */
--- a/usr/src/uts/intel/xmemfs/Makefile Thu Jan 18 14:25:26 2007 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,94 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# uts/intel/xmemfs/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -#ident "%Z%%M% %I% %E% SMI" -# -# This makefile drives the production of the xmemfs file system -# kernel module. -# -# x86 architecture dependent -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = xmemfs -OBJECTS = $(XMEMFS_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(XMEMFS_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(USR_FS_DIR)/$(MODULE) - -INC_PATH += -I../../i86pc - -# -# Include common rules. -# -include $(UTSBASE)/intel/Makefile.intel - -# -# Define targets -# -ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) - -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV - -# -# Default build targets. -# -.KEEP_STATE: - -def: $(DEF_DEPS) - -all: $(ALL_DEPS) - -clean: $(CLEAN_DEPS) - -clobber: $(CLOBBER_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - -install: $(INSTALL_DEPS) - -# -# Include common targets. -# -include $(UTSBASE)/intel/Makefile.targ