view usr/src/uts/common/fs/objfs/objfs_data.c @ 3898:c788126f2a20

PSARC/2007/124 Strong Type-Checking for VFS Operation Registration Mechanism 6505923 Need better type checking for vnodeops 6531594 lxpr_readlink() is missing the "cred_t *cr" arg 6532559 vfs_strayops does not use the vnode/vfs operation registration mechanism
author rsb
date Mon, 26 Mar 2007 17:41:06 -0700
parents 68f95e015346
children 3047ad28a67b
line wrap: on
line source

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#include <fs/fs_subr.h>

#include <sys/elf.h>
#include <sys/errno.h>
#include <sys/file.h>
#include <sys/kmem.h>
#include <sys/kobj.h>
#include <sys/objfs.h>
#include <sys/objfs_impl.h>
#include <sys/stat.h>
#include <sys/systm.h>
#include <sys/sysmacros.h>
#include <sys/vfs_opreg.h>

/*
 * /system/object/<obj>/object
 *
 * This is an ELF file that contains information about data stored in the
 * kernel.  We use a special ELF file type, ET_SUNWPSEUDO, so that we can
 * control which fields and sections have meaning.  The file contains the
 * following sections:
 *
 * 	.shstrtab	Section header string table
 * 	.SUNW_ctf	CTF data
 * 	.symtab		Symbol table
 * 	.strtab		String table
 * 	.text		Text
 * 	.data		Data
 * 	.bss		BSS
 * 	.filename	Filename of module
 * 	.info		Private module info structure
 *
 * The .text, .data, and .bss sections are all marked SHT_NOBITS, and the data
 * is not actually exported in the file for security reasons.  The section
 * headers do contain the address and size of the sections, which is needed by
 * DTrace.  The CTF data, symbol table, and string table are present only if
 * they exist in the kernel.
 */

typedef enum {
	SECT_TYPE_DATA,
	SECT_TYPE_SHSTRTAB,
	SECT_TYPE_DUMMY,
	SECT_TYPE_SYMTAB,
	SECT_TYPE_STRTAB,
	SECT_TYPE_FILENAME,
	SECT_TYPE_INFO
} sect_type_t;

typedef struct section_desc {
	sect_type_t	sect_id;
	const char	*sect_name;
	uintptr_t	sect_addr;
	size_t		sect_size;
	int		sect_type;
	int		sect_flags;
	size_t		sect_str;
	int		sect_link;
	int		sect_entsize;
	int		sect_align;
} section_desc_t;

/*
 * For data sections, 'addr' and 'size' refer to offsets within the module
 * structure where we can find the address and size of the section.
 */
#define	SECT_DATA(name, addr, size, type, flags, align) \
	{ SECT_TYPE_DATA, name, offsetof(struct module, addr), \
	offsetof(struct module, size), type, flags, 0, 0, 0, align }

/*
 * The dummy section is the initial section of the file.  It is put into this
 * array only for convenience when reading the file.
 */
#define	SECT_DUMMY	{ SECT_TYPE_DUMMY, "", 0, 0, 0, 0, 0, 0, 0, 0 }

/*
 * The size of the symbol table and string table are not immediately available
 * as an offset into the module struct, so we have to create individual types
 * for each.
 */
#ifdef _LP64
#define	SECT_SYMTAB(name, type, flags) \
	{ SECT_TYPE_SYMTAB, name, offsetof(struct module, symtbl), 0, type, \
	flags, 0, 0, sizeof (Elf64_Sym), sizeof (uint64_t) }
#else
#define	SECT_SYMTAB(name, type, flags) \
	{ SECT_TYPE_SYMTAB, name, offsetof(struct module, symtbl), 0, type, \
	flags, 0, 0, sizeof (Elf32_Sym), sizeof (uint32_t) }
#endif
#define	SECT_STRTAB(name, type, flags) \
	{ SECT_TYPE_STRTAB, name, offsetof(struct module, strings), 0, type, \
	flags, 0, 0, 0, 1 }

/*
 * The .shstrtab section is constructed when the module is first loaded.
 */
#define	SECT_SHSTRTAB(name, type, flags) \
	{ SECT_TYPE_SHSTRTAB, name, 0, 0, type, flags, 0, 0, 0, 1 }

/*
 * Generic module information (objfs_info_t)
 */
#define	SECT_INFO	\
	{ SECT_TYPE_INFO, ".info", 0, 0, SHT_PROGBITS, 0, 0, 0, 0, \
	sizeof (uint32_t) }

/*
 * Filename section.
 */
#define	SECT_FILENAME	\
	{ SECT_TYPE_FILENAME, ".filename", 0, 0, SHT_PROGBITS, 0, 0, 0, 0, 1 }

static section_desc_t data_sections[] = {
	SECT_DUMMY,
	SECT_SHSTRTAB(".shstrtab",
	    SHT_STRTAB, SHF_STRINGS),
	SECT_DATA(".SUNW_ctf", ctfdata, ctfsize,
	    SHT_PROGBITS, 0, sizeof (uint64_t)),
	SECT_SYMTAB(".symtab", SHT_SYMTAB, 0),
	SECT_STRTAB(".strtab", SHT_STRTAB, SHF_STRINGS),
	SECT_DATA(".text", text, text_size,
	    SHT_NOBITS, SHF_ALLOC | SHF_EXECINSTR, 0),
	SECT_DATA(".data", data, data_size,
	    SHT_NOBITS, SHF_WRITE | SHF_ALLOC, 0),
	SECT_DATA(".bss", bss, bss_size,
	    SHT_NOBITS, SHF_WRITE | SHF_ALLOC, 0),
	SECT_INFO,
	SECT_FILENAME
};

#define	NSECTIONS	\
	(sizeof (data_sections) / sizeof (section_desc_t))

#ifdef _LP64
#define	SECTION_OFFSET(section)	\
	(sizeof (Elf64_Ehdr) + (section) * sizeof (Elf64_Shdr))
#else
#define	SECTION_OFFSET(section)	\
	(sizeof (Elf32_Ehdr) + (section) * sizeof (Elf32_Shdr))
#endif

/*
 * Given a data node, returns the struct module appropriately locked.  If the
 * object has been unloaded, or re-loaded since the file was first opened, this
 * function will return NULL.  If successful, the caller must call
 * objfs_data_unlock().
 */
struct module *
objfs_data_lock(vnode_t *vp)
{
	objfs_datanode_t *dnode = vp->v_data;
	objfs_odirnode_t *odir = gfs_file_parent(vp)->v_data;
	struct modctl *mp = odir->objfs_odir_modctl;

	(void) mod_hold_by_modctl(mp, MOD_WAIT_FOREVER | MOD_LOCK_NOT_HELD);

	if (mp->mod_mp == NULL ||
	    dnode->objfs_data_gencount < mp->mod_gencount) {
		mod_release_mod(mp);
		return (NULL);
	}

	return (mp->mod_mp);
}

void
objfs_data_unlock(vnode_t *vp)
{
	objfs_odirnode_t *odir = gfs_file_parent(vp)->v_data;

	mod_release_mod(odir->objfs_odir_modctl);
}


/*
 * Called when the filesystem is first loaded.  Creates and initializes the
 * section header string table, and fills in the sect_str members of the section
 * descriptors.  This information could be encoded at compile-time, but this
 * way keeps the code more maintainable, as we don't have to worry about
 * duplicating information.
 */
void
objfs_data_init(void)
{
	int i, shstrtab, strtab, symtab;
	size_t len = 0;
	section_desc_t *sect;
	char *strdata;

	for (i = 0; i < NSECTIONS; i++) {
		sect = &data_sections[i];

		ASSERT(sect->sect_align == 0 || ISP2(sect->sect_align));
		ASSERT(sect->sect_align <= sizeof (uint64_t));

		len += strlen(sect->sect_name) + 1;
		if (strcmp(sect->sect_name, ".shstrtab") == 0)
			shstrtab = i;
		else if (strcmp(sect->sect_name, ".symtab") == 0)
			symtab = i;
		else if (strcmp(sect->sect_name, ".strtab") == 0)
			strtab = i;
	}

	strdata = kmem_zalloc(len, KM_SLEEP);
	sect = &data_sections[shstrtab];
	sect->sect_addr = (uintptr_t)strdata;
	sect->sect_size = len;

	len = 0;
	for (i = 0; i < NSECTIONS; i++) {
		sect = &data_sections[i];
		sect->sect_str = len;
		bcopy(sect->sect_name, strdata + len,
		    strlen(sect->sect_name) + 1);
		len += strlen(sect->sect_name) + 1;

		if (strcmp(sect->sect_name, ".SUNW_ctf") == 0)
			sect->sect_link = symtab;
		else if (strcmp(sect->sect_name, ".symtab") == 0)
			sect->sect_link = strtab;
	}
}

/*
 * Given a section descriptor and module pointer, return the address of the
 * data.
 */
static uintptr_t
sect_addr(section_desc_t *sp, struct module *mp)
{
	uintptr_t addr;

	switch (sp->sect_id) {
	case SECT_TYPE_DUMMY:
		addr = 0;
		break;

	case SECT_TYPE_SHSTRTAB:
		addr = sp->sect_addr;
		break;

	case SECT_TYPE_STRTAB:
	case SECT_TYPE_SYMTAB:
	case SECT_TYPE_DATA:
		addr = *((uintptr_t *)((char *)mp + sp->sect_addr));
		break;

	case SECT_TYPE_FILENAME:
		addr = (uintptr_t)mp->filename;
		break;

	case SECT_TYPE_INFO:
		addr = 1;	/* This can be anything nonzero */
		break;
	}

	return (addr);
}

/*
 * Given a section descriptor and module pointer, return the size of the data.
 */
static size_t
sect_size(section_desc_t *sp, struct module *mp)
{
	size_t size;

	switch (sp->sect_id) {
	case SECT_TYPE_DUMMY:
		size = 0;
		break;

	case SECT_TYPE_SHSTRTAB:
		size = sp->sect_size;
		break;

	case SECT_TYPE_DATA:
		size = *((size_t *)((char *)mp + sp->sect_size));
		break;

	case SECT_TYPE_SYMTAB:
		size = mp->symhdr->sh_size;
		break;

	case SECT_TYPE_STRTAB:
		size = mp->strhdr->sh_size;
		break;

	case SECT_TYPE_INFO:
		size = sizeof (objfs_info_t);
		break;

	case SECT_TYPE_FILENAME:
		if (mp->filename == NULL)
			size = 0;
		else
			size = strlen(mp->filename) + 1;
	}

	return (size);
}

/*
 * Given a section descriptor and module pointer, return 1 if the section has
 * valid data and should be included, 0 otherwise.
 */
static int
sect_valid(section_desc_t *sp, struct module *mp)
{
	if (sp->sect_id == SECT_TYPE_DUMMY ||
	    sect_addr(sp, mp) != 0)
		return (1);

	return (0);
}

/*
 * Given a section descriptor and module pointer, return the offset into the
 * file where the data should be placed.
 */
static size_t
data_offset(section_desc_t *sp, struct module *mp)
{
	int i;
	size_t len;
	section_desc_t *cp;

	if (sp != NULL && mp != NULL && !sect_valid(sp, mp))
		return (0);

#ifdef _LP64
	len = sizeof (Elf64_Ehdr);
#else
	len = sizeof (Elf32_Ehdr);
#endif

	/*
	 * Do a first pass to account for all the section headers.
	 */
	for (i = 0; i < NSECTIONS; i++) {
		if (sect_valid(&data_sections[i], mp)) {
#ifdef _LP64
			len += sizeof (Elf64_Shdr);
#else
			len += sizeof (Elf32_Shdr);
#endif
		}
	}

	/*
	 * Add length of each section until we find the one we're looking for.
	 */
	for (i = 0; i < NSECTIONS; i++) {
		cp = &data_sections[i];

		/*
		 * Align the section only if it's valid and contains data.  When
		 * searching for a specific section, align the section before
		 * breaking out of the loop.
		 */
		if (sect_valid(cp, mp) && cp->sect_type != SHT_NOBITS) {
			if (cp->sect_align > 1)
				len = P2ROUNDUP(len, cp->sect_align);

			if (sp != cp)
				len += sect_size(cp, mp);
		}

		if (sp == cp)
			break;
	}

	return (len);
}

/*
 * Given an index into the section table and a module pointer, returns the
 * data offset of the next section.
 */
static size_t
next_offset(int idx, struct module *mp)
{
	int i;

	for (i = idx + 1; i < NSECTIONS; i++) {
		if (sect_valid(&data_sections[i], mp))
			return (data_offset(&data_sections[i], mp));
	}

	return (data_offset(NULL, mp));
}

/*
 * Given a module pointer, return the total size needed for the file.
 */
static size_t
data_size(struct module *mp)
{
	return (data_offset(NULL, mp));
}

/*
 * Returns the size needed for all the headers in the file.
 */
static size_t
header_size(void)
{
	return (data_offset(&data_sections[0], NULL));
}

/* ARGSUSED */
vnode_t *
objfs_create_data(vnode_t *pvp)
{
	objfs_odirnode_t *onode = pvp->v_data;
	vnode_t *vp = gfs_file_create(sizeof (objfs_datanode_t), pvp,
	    objfs_ops_data);
	objfs_datanode_t *dnode = vp->v_data;

	dnode->objfs_data_gencount = onode->objfs_odir_modctl->mod_gencount;
	dnode->objfs_data_info.objfs_info_primary =
	    onode->objfs_odir_modctl->mod_prim;

	return (vp);
}

/* ARGSUSED */
static int
objfs_data_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
{
	struct module *mp;
	timestruc_t now;

	if ((mp = objfs_data_lock(vp)) == NULL)
		return (EIO);

	vap->va_type = VREG;
	vap->va_mode = S_IRUSR | S_IRGRP | S_IROTH;
	vap->va_nodeid = gfs_file_inode(vp);
	vap->va_nlink = 1;
	vap->va_size = data_size(mp);
	gethrestime(&now);
	vap->va_atime = vap->va_ctime = vap->va_mtime = now;

	(void) objfs_common_getattr(vp, vap);

	objfs_data_unlock(vp);

	return (0);
}

/* ARGSUSED */
static int
objfs_data_access(vnode_t *vp, int mode, int flags, cred_t *cr)
{
	if (mode & (VWRITE|VEXEC))
		return (EACCES);

	return (0);
}

/* ARGSUSED */
int
objfs_data_open(vnode_t **cpp, int flag, cred_t *cr)
{
	if (flag & FWRITE)
		return (EINVAL);

	return (0);
}

/*
 * Iterate over all symbols in the table and output each one individually,
 * converting st_shndx to SHN_ABS for each symbol.
 */
static int
read_symtab(void *addr, size_t size, off_t offset, uio_t *uio)
{
#ifdef _LP64
	Elf64_Sym sym, *symtab;
#else
	Elf32_Sym sym, *symtab;
#endif
	off_t index;
	int error;

	symtab = addr;

	if (offset % sizeof (sym) != 0) {
		/*
		 * Be careful with the first symbol, as it is not
		 * symbol-aligned.
		 */
		off_t partial = offset % sizeof (sym);

		index = offset / sizeof (sym);

		sym = symtab[index];
		if (sym.st_shndx != SHN_UNDEF)
			sym.st_shndx = SHN_ABS;

		if ((error = uiomove((char *)&sym + partial,
		    sizeof (sym) - partial, UIO_READ, uio)) != 0 ||
		    uio->uio_resid <= 0)
			return (error);

		offset = (index + 1) * sizeof (sym);
	}

	ASSERT(size % sizeof (sym) == 0);

	for (index = offset / sizeof (sym); index < size / sizeof (sym);
	    index++) {

		sym = symtab[index];
		if (sym.st_shndx != SHN_UNDEF)
			sym.st_shndx = SHN_ABS;

		if ((error = uiomove((char *)&sym, sizeof (sym), UIO_READ,
		    uio)) != 0 || uio->uio_resid <= 0)
			return (error);
	}

	return (0);
}

/* ARGSUSED */
static int
objfs_data_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr,
    caller_context_t *ct)
{
	int error = 0;
	objfs_datanode_t *dnode = vp->v_data;
	struct module *mp;
	off_t off;
#ifdef _LP64
	Elf64_Shdr shdr;
#else
	Elf32_Shdr shdr;
#endif
	int i, j;
	section_desc_t *sp;
	void *addr;
	int transidx[NSECTIONS];

	if ((mp = objfs_data_lock(vp)) == NULL)
		return (ENOENT);

	if (uio->uio_resid <= 0 || uio->uio_offset >= data_size(mp))
		goto error;

	/*
	 * Construct an array to translate from a generic section header index
	 * to an index specific for this object.
	 */
	for (i = 0, j = 0; i < NSECTIONS; i++) {
		transidx[i] = j;
		if (sect_valid(&data_sections[i], mp))
			j++;

	}

	/*
	 * Check to see if we're in the Elf header
	 */
	if (uio->uio_loffset < SECTION_OFFSET(0)) {
#ifdef _LP64
		Elf64_Ehdr ehdr;
#else
		Elf32_Ehdr ehdr;
#endif

		bzero(&ehdr, sizeof (ehdr));

		bcopy(ELFMAG, ehdr.e_ident, SELFMAG);
#ifdef _BIG_ENDIAN
		ehdr.e_ident[EI_DATA] = ELFDATA2MSB;
#else
		ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
#endif
		ehdr.e_ident[EI_VERSION] = EV_CURRENT;

#ifdef _LP64
		ehdr.e_ident[EI_CLASS] = ELFCLASS64;
		ehdr.e_type = ELFCLASS64;
		ehdr.e_ehsize = sizeof (Elf64_Ehdr);
		ehdr.e_phentsize = sizeof (Elf64_Phdr);
		ehdr.e_shentsize = sizeof (Elf64_Shdr);
#else
		ehdr.e_ident[EI_CLASS] = ELFCLASS32;
		ehdr.e_type = ELFCLASS32;
		ehdr.e_ehsize = sizeof (Elf32_Ehdr);
		ehdr.e_phentsize = sizeof (Elf32_Phdr);
		ehdr.e_shentsize = sizeof (Elf32_Shdr);
#endif

#ifdef __sparc
#ifdef __sparcv9
		ehdr.e_machine = EM_SPARCV9;
#else
		ehdr.e_machine = EM_SPARC;
#endif
#elif defined(__amd64)
		ehdr.e_machine = EM_AMD64;
#else
		ehdr.e_machine = EM_386;
#endif

		ehdr.e_version = EV_CURRENT;
		ehdr.e_type = ET_SUNWPSEUDO;
		ehdr.e_shnum = 0;
		ehdr.e_shoff = SECTION_OFFSET(0);

		for (i = 0; i < NSECTIONS; i++) {
			if (strcmp(data_sections[i].sect_name,
			    ".shstrtab") == 0)
				ehdr.e_shstrndx = transidx[i];

			if (sect_valid(&data_sections[i], mp))
				ehdr.e_shnum++;
		}

		if ((error = uiomove((char *)&ehdr + uio->uio_loffset,
		    sizeof (ehdr) - uio->uio_loffset, UIO_READ, uio)) != 0 ||
		    uio->uio_resid <= 0)
			goto error;
	}

	/*
	 * Go through and construct section headers for each section.
	 */
	j = 0;
	for (i = 0; i < NSECTIONS; i++) {
		sp = &data_sections[i];

		if (!sect_valid(sp, mp))
			continue;

		if (uio->uio_loffset < SECTION_OFFSET(j+1)) {
			shdr.sh_link = transidx[sp->sect_link];
			shdr.sh_entsize = sp->sect_entsize;
			shdr.sh_info = 0;
			shdr.sh_name = sp->sect_str;
			shdr.sh_type = sp->sect_type;
			shdr.sh_flags = sp->sect_flags;
			shdr.sh_addr = sect_addr(sp, mp);
			shdr.sh_offset = data_offset(sp, mp);
			shdr.sh_size = sect_size(sp, mp);
			shdr.sh_addralign = sp->sect_align;

			off = uio->uio_loffset - SECTION_OFFSET(j);
			if ((error = uiomove((char *)&shdr + off,
			    sizeof (shdr) - off, UIO_READ, uio)) != 0 ||
			    uio->uio_resid <= 0)
				goto error;
		}

		j++;
	}

	/*
	 * Output the data for each section
	 */
	for (i = 0; i < NSECTIONS; i++) {
		size_t nextoff;
		sp = &data_sections[i];
		nextoff = next_offset(i, mp);
		if (sect_valid(sp, mp) && sp->sect_type != SHT_NOBITS &&
		    uio->uio_loffset < nextoff) {

			if (sp->sect_id == SECT_TYPE_INFO)
				addr = &dnode->objfs_data_info;
			else
				addr = (void *)sect_addr(sp, mp);
			off = uio->uio_loffset - data_offset(sp, mp);

			/*
			 * The symtab requires special processing to convert
			 * the st_shndx field to SHN_ABS.  Otherwise, simply
			 * copy the data in bulk.
			 */
			if (sp->sect_id == SECT_TYPE_SYMTAB)
				error = read_symtab(addr, sect_size(sp, mp),
				    off, uio);
			else
				error = uiomove((char *)addr + off,
				    sect_size(sp, mp) - off, UIO_READ, uio);

			if (error != 0 || uio->uio_resid <= 0)
				goto error;

			/*
			 * If the next section needs to be aligned, pad out with
			 * zeroes.
			 */
			if (uio->uio_loffset < nextoff) {
				uint64_t padding = 0;

				ASSERT(nextoff - uio->uio_loffset <
				    sizeof (uint64_t));

				if ((error = uiomove(&padding,
				    nextoff - uio->uio_loffset, UIO_READ,
				    uio)) != 0 || uio->uio_resid <= 0)
					goto error;

			}
		}
	}

error:
	objfs_data_unlock(vp);

	return (error);
}

/* ARGSUSED */
static int
objfs_data_seek(vnode_t *vp, offset_t off, offset_t *offp)
{
	return (0);
}

const fs_operation_def_t objfs_tops_data[] = {
	{ VOPNAME_OPEN,		{ .vop_open = objfs_data_open } },
	{ VOPNAME_CLOSE,	{ .vop_close = objfs_common_close } },
	{ VOPNAME_IOCTL,	{ .error = fs_inval } },
	{ VOPNAME_GETATTR,	{ .vop_getattr = objfs_data_getattr } },
	{ VOPNAME_ACCESS,	{ .vop_access = objfs_data_access } },
	{ VOPNAME_INACTIVE,	{ .vop_inactive = gfs_vop_inactive } },
	{ VOPNAME_READ,		{ .vop_read = objfs_data_read } },
	{ VOPNAME_SEEK,		{ .vop_seek = objfs_data_seek } },
	{ VOPNAME_MAP,		{ .vop_map = gfs_vop_map } },
	{ NULL }
};