view usr/src/cmd/fm/modules/common/sw-diag-response/common/sw.h @ 12979:ab9ae749152f

PSARC/2009/617 Software Events Notification Parameters CLI PSARC/2009/618 snmp-notify: SNMP Notification Daemon for Software Events PSARC/2009/619 smtp-notify: Email Notification Daemon for Software Events PSARC/2010/225 fmd for non-global Solaris zones PSARC/2010/226 Solaris Instance UUID PSARC/2010/227 nvlist_nvflag(3NVPAIR) PSARC/2010/228 libfmevent additions PSARC/2010/257 sysevent_evc_setpropnvl and sysevent_evc_getpropnvl PSARC/2010/265 FMRI and FMA Event Stabilty, 'ireport' category 1 event class, and the 'sw' FMRI scheme PSARC/2010/278 FMA/SMF integration: instance state transitions PSARC/2010/279 Modelling panics within FMA PSARC/2010/290 logadm.conf upgrade 6392476 fmdump needs to pretty-print 6393375 userland ereport/ireport event generation interfaces 6445732 Add email notification agent for FMA and software events 6804168 RFE: Allow an efficient means to monitor SMF services status changes 6866661 scf_values_destroy(3SCF) will segfault if is passed NULL 6884709 Add snmp notification agent for FMA and software events 6884712 Add private interface to tap into libfmd_msg macro expansion capabilities 6897919 fmd to run in a non-global zone 6897937 fmd use of non-private doors is not safe 6900081 add a UUID to Solaris kernel image for use in crashdump identification 6914884 model panic events as a defect diagnosis in FMA 6944862 fmd_case_open_uuid, fmd_case_uuisresolved, fmd_nvl_create_defect 6944866 log legacy sysevents in fmd 6944867 enumerate svc scheme in topo 6944868 software-diagnosis and software-response fmd modules 6944870 model SMF maintenance state as a defect diagnosis in FMA 6944876 savecore runs in foreground for systems with zfs root and dedicated dump 6965796 Implement notification parameters for SMF state transitions and FMA events 6968287 SUN-FM-MIB.mib needs to be updated to reflect Oracle information 6972331 logadm.conf upgrade PSARC/2010/290
author Gavin Maltby <gavin.maltby@oracle.com>
date Fri, 30 Jul 2010 17:04:17 +1000
parents
children
line wrap: on
line source

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
 */

#ifndef	_SW_H
#define	_SW_H

#ifdef	__cplusplus
extern "C" {
#endif

#include <sys/fm/protocol.h>
#include <fm/fmd_api.h>
#include <libnvpair.h>
#include <pthread.h>
#include <libuutil.h>

/*
 * We have two real fmd modules - software-diagnosis and software-response.
 * Each hosts a number of subsidiary diagnosis engines and response agents,
 * although these are not fmd modules as such (the intention is to avoid
 * a proliferation of small C diagnosis and response modules).
 *
 * Subsidiary "modules" are not loaded as normal fmd modules are.  Instead
 * each of the real modules software-diagnosis and software-response includes
 * an array listing the subsidiaries it hosts, and when the real module
 * is loaded by fmd it iterates over this list to "load" subsidiaries by
 * calling their nominated init function.
 */

/* Maximum number of subsidiary "modules" */
#define	SW_SUB_MAX	10

/* Maximum number of supported timers across all subsidiaries */
#define	SW_TIMER_MAX	20

/*
 * A subsidiary must perform fmd_hdl_subscribe calls for all events of
 * interest to it.  These are typically performed during its init
 * function.  All subscription callbacks funnel through the shared
 * fmdo_recv entry point; that function walks through the dispatch list
 * for each subsidiary and performs a callback for the first matching entry of
 * each subsidiary.  The init entry point for each subsidiary
 * returns a pointer to an array of struct sw_disp applicable for that
 * entity.
 *
 * Note that the framework does *not* perform any fmd_hdl_subscribe calls
 * on behalf of the subsidiary - the swd_classpat member below is used
 * in routing events, not in establishing subscriptions for them.  A
 * subsidiary can subscribe to say "ireport.foo.a" and "ireport.foo.b"
 * but could elect to nominate a common handler for those via a single
 * struct sw_disp with swd_classpat of "ireport.foo.*".
 */
typedef void sw_dispfunc_t(fmd_hdl_t *, fmd_event_t *, nvlist_t *,
    const char *, void *);

struct sw_disp {
	const char *swd_classpat;	/* event classes to callback for */
	sw_dispfunc_t *swd_func;	/* callback function */
	void *swd_arg;			/* opaque argument to callback */
};

/*
 * A diagnosis or response subsidiary must provide a struct sw_subinfo with
 * all its pertinent information;  a pointer to this structure must be
 * included in the array of struct sw_subinfo pointers in each of
 * software-diagnosis and software-response.
 *
 * swsub_name
 *	This should be chosen to be unique to this subsidiary;
 *	by convention it should also be the name prefix used in any fmd
 *	buffers	the subsidiary creates.
 *
 * swsub_casetype
 *	A diagnosis subsidiary solves cases using swde_case_* below, and it
 *	must specify in swsub_casetype the type of case it solves.  A response
 *	subsidiary must specify SW_CASE_NONE here.  A subsidiary may only solve
 *	at most one type of case, and no two subsidiaries must solve the same
 *	case type.  We use the case type to associate a subsidiary owner of
 *	the fmd case that is really owned by the host module.
 *
 * swsub_init
 *	The initialization function for this subsidiary, akin to the
 *	_fmd_init in a traditional fmd module.  This must not be NULL.
 *
 *	 When the host diagnosis/response module initializes the _fmd_init
 *	 entry point will call the swsub_init function for each subsidiary
 *	 in turn.  The fmd handle has already been registered and timers are
 *	 available for installation (see below);  the swsub_init function must
 *	 return a pointer to a NULL-terminated array of struct sw_disp
 *	 describing the event dispatch preferences for that module, and fill
 *	 an integer we pass with the number of entries in that array (including
 *	 the terminating NULL entry).  The swsub_init function also receives
 *	 a subsidiary-unique id_t assigned by the framework that it should
 *	 keep a note of for use in timer installation (see below);  this id
 *	 should not be persisted to checkpoint data.
 *
 * swsub_fini
 *	When the host module _fmd_fini is called it will call this function
 *	for each subsidiary.  A subsidiary can specify NULL here.
 *
 * swsub_timeout
 *	This is the timeout function to call for expired timers installed by
 *	this subsidiary.  See sw_timer_{install,remove} below.  May be
 *	NULL if no timers are used by this subsidiary.
 *
 * swsub_case_close
 *	This function is called when a case "owned" by a subsidiary
 *	is the subject of an fmdo_close callback.  Can be NULL, and
 *	must be NULL for a subsidiary with case type SW_CASE_NONE (such
 *	as a response subsidiary).
 *
 * swsub_case_verify
 *	This is called during _fmd_init of the host module.  The host module
 *	iterates over all cases that it owns and calls the verify function
 *	for the real owner which may choose to close cases if they no longer
 *	apply.  Can be NULL, and must be NULL for a subsidiary with case
 *	type SW_CASE_NONE.
 */

/*
 * sw_casetype values are persisted to checkpoints - do not change values.
 */
enum sw_casetype {
	SW_CASE_NONE = 0x0ca5e000,
	SW_CASE_SMF,
	SW_CASE_PANIC
};

/*
 * Returns for swsub_init.  The swsub_fini entry point will only be
 * called for subsidiaries that returned SW_SUB_INIT_SUCCESS on init.
 */
#define	SW_SUB_INIT_SUCCESS		0
#define	SW_SUB_INIT_FAIL_VOLUNTARY	1	/* chose not to init */
#define	SW_SUB_INIT_FAIL_ERROR		2	/* error prevented init */

typedef void swsub_case_close_func_t(fmd_hdl_t *, fmd_case_t *);
typedef int sw_case_vrfy_func_t(fmd_hdl_t *, fmd_case_t *);

struct sw_subinfo {
	const char *swsub_name;
	enum sw_casetype swsub_casetype;
	int (*swsub_init)(fmd_hdl_t *, id_t, const struct sw_disp **, int *);
	void (*swsub_fini)(fmd_hdl_t *);
	void (*swsub_timeout)(fmd_hdl_t *, id_t, void *);
	swsub_case_close_func_t *swsub_case_close;
	sw_case_vrfy_func_t *swsub_case_verify;
};

/*
 * List sw_subinfo for each subsidiary diagnosis and response "module" here
 */
extern const struct sw_subinfo smf_diag_info;
extern const struct sw_subinfo smf_response_info;
extern const struct sw_subinfo panic_diag_info;

/*
 * Timers - as per the fmd module API but with an additional id_t argument
 * specifying the unique id of the subsidiary installing the timer (provided
 * to the subsidiary in its swsub_init call).
 */
extern id_t sw_timer_install(fmd_hdl_t *, id_t, void *, fmd_event_t *,
    hrtime_t);
extern void sw_timer_remove(fmd_hdl_t *, id_t, id_t);

/*
 * The software-diagnosis subsidiaries can open and solve cases; to do so
 * they must use the following wrappers to the usual fmd module API case
 * management functions.  We need this so that a subsidiary can iterate
 * over *its* cases (fmd_case_next would iterate over those of other
 * subsidiaries), receive in the subsidiary a callback when a case it opened
 * is closed, etc.  The subsidiary can use other fmd module API members
 * for case management, such as fmd_case_add_ereport.
 *
 * Each subsidiary opens cases of its own unique type, identified by
 * the sw_casetype enumeration.  The values used in this enumeration
 * must never change - they are written to checkpoint state.
 *
 * swde_case_open
 *	Opens a new case of the correct subsidiary type for the given
 *	subsidiary id.  If a uuid string is provided then open a case
 *	with that uuid using fmd_case_open_uuid, allowing case uuid
 *	to match some relevant uuid that was received in one of the
 *	events that has led us to open this case.
 *
 *	If the subsidiarywishes to associate some persistent
 *	case data with the new case thenit can fmd_hdl_alloc and complete a
 *	suitably-packed serialization structure and include a pointer to it
 *	in the call to sw_case_open together with the structure size and
 *	structure version.  The	framework will create a new fmd buffer (named
 *	for you, based on the case type) and write the structure out to disk;
 *	when the module or fmd is restarted this structure is restored from
 *	disk for you and reassociated with the case - use swde_case_data to
 *	retrieve a pointer to it.
 *
 * swde_case_first, swde_case_next
 *	A subsidiary DE can iterate over its cases using swde_case_first and
 *	swde_case_next.  For swde_case_first quote the subsidiary id;
 *	for swde_case_next quote the last case returned.
 *
 * swde_case_data
 *	Returns a pointer to the previously-serialized case data, and fills
 *	a uint32_t with the version of that serialized data.
 *
 * swde_case_data_write
 *	Whenever a subsidiary modifies its persistent data structure
 *	it must call swde_case_data_write to indicate that the associated
 *	fmd buffer is dirty and needs to be rewritten.
 *
 * swde_case_data_upgrade
 *	If the subsidiary ever revs its persistent structure it needs to call
 *	swde_case_data_upgrade to register the new version and structure size,
 *	and write the structure out to a reallocated fmd buffer;  the old
 *	case data structure (if any) will be freed.  A subsidiary may use
 *	this interface to migrate old persistence structures restored from
 *	checkpoint - swde_case_data will return a version number below the
 *	current.
 */

extern fmd_case_t *swde_case_open(fmd_hdl_t *, id_t, char *, uint32_t,
    void *, size_t);
extern fmd_case_t *swde_case_first(fmd_hdl_t *, id_t);
extern fmd_case_t *swde_case_next(fmd_hdl_t *, fmd_case_t *);
extern void *swde_case_data(fmd_hdl_t *, fmd_case_t *, uint32_t *);
extern void swde_case_data_write(fmd_hdl_t *, fmd_case_t *);
extern void swde_case_data_upgrade(fmd_hdl_t *, fmd_case_t *, uint32_t,
    void *, size_t);

#ifdef	__cplusplus
}
#endif

#endif	/* _SW_H */