Mercurial > illumos > illumos-gate
diff usr/src/cmd/fm/modules/common/sw-diag-response/common/sw_main_cmn.c @ 12979:ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
PSARC/2009/618 snmp-notify: SNMP Notification Daemon for Software Events
PSARC/2009/619 smtp-notify: Email Notification Daemon for Software Events
PSARC/2010/225 fmd for non-global Solaris zones
PSARC/2010/226 Solaris Instance UUID
PSARC/2010/227 nvlist_nvflag(3NVPAIR)
PSARC/2010/228 libfmevent additions
PSARC/2010/257 sysevent_evc_setpropnvl and sysevent_evc_getpropnvl
PSARC/2010/265 FMRI and FMA Event Stabilty, 'ireport' category 1 event class, and the 'sw' FMRI scheme
PSARC/2010/278 FMA/SMF integration: instance state transitions
PSARC/2010/279 Modelling panics within FMA
PSARC/2010/290 logadm.conf upgrade
6392476 fmdump needs to pretty-print
6393375 userland ereport/ireport event generation interfaces
6445732 Add email notification agent for FMA and software events
6804168 RFE: Allow an efficient means to monitor SMF services status changes
6866661 scf_values_destroy(3SCF) will segfault if is passed NULL
6884709 Add snmp notification agent for FMA and software events
6884712 Add private interface to tap into libfmd_msg macro expansion capabilities
6897919 fmd to run in a non-global zone
6897937 fmd use of non-private doors is not safe
6900081 add a UUID to Solaris kernel image for use in crashdump identification
6914884 model panic events as a defect diagnosis in FMA
6944862 fmd_case_open_uuid, fmd_case_uuisresolved, fmd_nvl_create_defect
6944866 log legacy sysevents in fmd
6944867 enumerate svc scheme in topo
6944868 software-diagnosis and software-response fmd modules
6944870 model SMF maintenance state as a defect diagnosis in FMA
6944876 savecore runs in foreground for systems with zfs root and dedicated dump
6965796 Implement notification parameters for SMF state transitions and FMA events
6968287 SUN-FM-MIB.mib needs to be updated to reflect Oracle information
6972331 logadm.conf upgrade PSARC/2010/290
author | Gavin Maltby <gavin.maltby@oracle.com> |
---|---|
date | Fri, 30 Jul 2010 17:04:17 +1000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/cmd/fm/modules/common/sw-diag-response/common/sw_main_cmn.c Fri Jul 30 17:04:17 2010 +1000 @@ -0,0 +1,474 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * Code shared by software-diagnosis and software-response modules. + * The fmd module linkage info for the two modules lives in swde_main.c + * (for software-diagnosis) and swrp_main.c (for software-response). + */ + +#include "../common/sw_impl.h" + +/* + * Each subsidiary that is hosted is assigned a unique subsidiary id. These + * macros convert between the id of a subsidiary and the index used in keeping + * track of subsidiaries. Outside of this file these ids should remain + * opaque. + */ +#define ID2IDX(id) ((int)((id) & 0xff0000) >> 16) +#define IDX2ID(i) ((id_t)((i) << 16) | 0x1d000000) + +#define SUBIDVALID(msinfo, id) (((int)(id) & 0xff00ffff) == 0x1d000000 && \ + ID2IDX(id) < (msinfo)->swms_dispcnt) + +static struct { + fmd_stat_t sw_recv_total; + fmd_stat_t sw_recv_match; + fmd_stat_t sw_recv_callback; +} sw_stats = { + { "sw_recv_total", FMD_TYPE_UINT64, + "total events received" }, + { "sw_recv_match", FMD_TYPE_UINT64, + "events matching some subsidiary" }, + { "sw_recv_callback", FMD_TYPE_UINT64, + "callbacks to all subsidiaries" }, +}; + +#define BUMPSTAT(stat) sw_stats.stat.fmds_value.ui64++ +#define BUMPSTATN(stat, n) sw_stats.stat.fmds_value.ui64 += (n) + +/* + * ========================== Event Receipt ================================= + * + * The fmdo_recv entry point. See which sub de/response agents have a + * matching subscription and callback for the first match from each. + * The sub de/response agents should dispatch *all* their subscriptions + * via their registered dispatch table, including things like list.repaired. + */ +void +sw_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) +{ + struct sw_modspecific *msinfo; + int calls = 0; + int mod; + + BUMPSTAT(sw_recv_total); + + msinfo = (struct sw_modspecific *)fmd_hdl_getspecific(hdl); + + /* + * For each sub module that has a matching class pattern call the + * registered callback for that sub DE. Only one match per sub module + * is allowed (the first match in its table, others are not checked). + */ + for (mod = 0; mod < msinfo->swms_dispcnt; mod++) { + const struct sw_disp *dp; + sw_dispfunc_t *dispf = NULL; + + for (dp = (*msinfo->swms_disptbl)[mod]; + dp != NULL && dp->swd_classpat != NULL; dp++) { + if (fmd_nvl_class_match(hdl, nvl, dp->swd_classpat)) { + dispf = dp->swd_func; + break; + } + } + if (dispf != NULL) { + calls++; + (*dispf)(hdl, ep, nvl, class, dp->swd_arg); + } + } + + BUMPSTAT(sw_recv_match); + if (calls) + BUMPSTATN(sw_recv_callback, calls); +} + +/* + * ========================== Timers ======================================== + * + * A subsidiary can install a timer; it must pass an additional argument + * identifying itself so that we can hand off to the appropriate + * swsub_timeout function in the fmdo_timeout entry point when the timer fires. + */ +id_t +sw_timer_install(fmd_hdl_t *hdl, id_t who, void *arg, fmd_event_t *ep, + hrtime_t hrt) +{ + struct sw_modspecific *msinfo; + const struct sw_subinfo **subinfo; + const struct sw_subinfo *sip; + int slot, chosen = -1; + id_t timerid; + + msinfo = (struct sw_modspecific *)fmd_hdl_getspecific(hdl); + if (!SUBIDVALID(msinfo, who)) + fmd_hdl_abort(hdl, "sw_timer_install: invalid subid %d\n", who); + + subinfo = *msinfo->swms_subinfo; + sip = subinfo[ID2IDX(who)]; + + if (sip-> swsub_timeout == NULL) + fmd_hdl_abort(hdl, "sw_timer_install: no swsub_timeout\n"); + + /* + * Look for a slot. Module entry points are single-threaded + * in nature, but if someone installs a timer from a door + * service function we're contended. + */ + (void) pthread_mutex_lock(&msinfo->swms_timerlock); + for (slot = 0; slot < SW_TIMER_MAX; slot++) { + if (msinfo->swms_timers[slot].swt_state != SW_TMR_INUSE) { + chosen = slot; + break; + } + } + + if (chosen == -1) + fmd_hdl_abort(hdl, "timer slots exhausted\n"); + + msinfo->swms_timers[chosen].swt_state = SW_TMR_INUSE; + msinfo->swms_timers[chosen].swt_ownerid = who; + msinfo->swms_timers[chosen].swt_timerid = timerid = + fmd_timer_install(hdl, arg, ep, hrt); + + (void) pthread_mutex_unlock(&msinfo->swms_timerlock); + + return (timerid); +} + +/* + * Look for a timer installed by a given subsidiary matching timerid. + */ +static int +subtimer_find(struct sw_modspecific *msinfo, id_t who, id_t timerid) +{ + int slot; + + for (slot = 0; slot < SW_TIMER_MAX; slot++) { + if (msinfo->swms_timers[slot].swt_state == SW_TMR_INUSE && + (who == -1 || + msinfo->swms_timers[slot].swt_ownerid == who) && + msinfo->swms_timers[slot].swt_timerid == timerid) + return (slot); + } + + return (-1); +} + +void +sw_timer_remove(fmd_hdl_t *hdl, id_t who, id_t timerid) +{ + struct sw_modspecific *msinfo; + const struct sw_subinfo **subinfo; + const struct sw_subinfo *sip; + int slot; + + msinfo = (struct sw_modspecific *)fmd_hdl_getspecific(hdl); + if (!SUBIDVALID(msinfo, who)) + fmd_hdl_abort(hdl, "sw_timer_remove: invalid subid\n"); + + subinfo = *msinfo->swms_subinfo; + sip = subinfo[ID2IDX(who)]; + + (void) pthread_mutex_lock(&msinfo->swms_timerlock); + if ((slot = subtimer_find(msinfo, who, timerid)) == -1) + fmd_hdl_abort(hdl, "sw_timer_remove: timerid %d not found " + "for %s\n", timerid, sip->swsub_name); + fmd_timer_remove(hdl, timerid); + msinfo->swms_timers[slot].swt_state = SW_TMR_RMVD; + (void) pthread_mutex_unlock(&msinfo->swms_timerlock); +} + +/* + * The fmdo_timeout entry point. + */ +void +sw_timeout(fmd_hdl_t *hdl, id_t timerid, void *arg) +{ + struct sw_modspecific *msinfo; + const struct sw_subinfo **subinfo; + const struct sw_subinfo *sip; + id_t owner; + int slot; + + msinfo = (struct sw_modspecific *)fmd_hdl_getspecific(hdl); + + (void) pthread_mutex_lock(&msinfo->swms_timerlock); + if ((slot = subtimer_find(msinfo, -1, timerid)) == -1) + fmd_hdl_abort(hdl, "sw_timeout: timerid %d not found\n"); + (void) pthread_mutex_unlock(&msinfo->swms_timerlock); + + owner = msinfo->swms_timers[slot].swt_ownerid; + if (!SUBIDVALID(msinfo, owner)) + fmd_hdl_abort(hdl, "sw_timeout: invalid subid\n"); + + subinfo = *msinfo->swms_subinfo; + sip = subinfo[ID2IDX(owner)]; + + sip->swsub_timeout(hdl, timerid, arg); +} + +/* + * ========================== sw_subinfo access ============================= + */ + +enum sw_casetype +sw_id_to_casetype(fmd_hdl_t *hdl, id_t who) +{ + struct sw_modspecific *msinfo; + const struct sw_subinfo **subinfo; + const struct sw_subinfo *sip; + + msinfo = (struct sw_modspecific *)fmd_hdl_getspecific(hdl); + if (!SUBIDVALID(msinfo, who)) + fmd_hdl_abort(hdl, "sw_id_to_casetype: invalid subid %d\n", + who); + + subinfo = *msinfo->swms_subinfo; + sip = subinfo[ID2IDX(who)]; + + if ((sip->swsub_casetype & SW_CASE_NONE) != SW_CASE_NONE) + fmd_hdl_abort(hdl, "sw_id_to_casetype: bad case type %d " + "for %s\n", sip->swsub_casetype, sip->swsub_name); + + return (sip->swsub_casetype); +} + +/* + * Given a case type lookup the struct sw_subinfo for the subsidiary + * that opens cases of that type. + */ +static const struct sw_subinfo * +sw_subinfo_bycase(fmd_hdl_t *hdl, enum sw_casetype type) +{ + struct sw_modspecific *msinfo; + const struct sw_subinfo **subinfo; + const struct sw_subinfo *sip; + int i; + + msinfo = (struct sw_modspecific *)fmd_hdl_getspecific(hdl); + + subinfo = *msinfo->swms_subinfo; + for (i = 0; i < SW_SUB_MAX; i++) { + sip = subinfo[i]; + if (sip->swsub_casetype == type) + return (sip); + } + + return (NULL); +} + +/* + * Find the case close function for the given case type; can be NULL. + */ +swsub_case_close_func_t * +sw_sub_case_close_func(fmd_hdl_t *hdl, enum sw_casetype type) +{ + const struct sw_subinfo *sip; + + if ((sip = sw_subinfo_bycase(hdl, type)) == NULL) + fmd_hdl_abort(hdl, "sw_sub_case_close_func: case type " + "%d not found\n", type); + + return (sip->swsub_case_close); +} + +/* + * Find the case verify function for the given case type; can be NULL. + */ +sw_case_vrfy_func_t * +sw_sub_case_vrfy_func(fmd_hdl_t *hdl, enum sw_casetype type) +{ + const struct sw_subinfo *sip; + + if ((sip = sw_subinfo_bycase(hdl, type)) == NULL) + fmd_hdl_abort(hdl, "sw_sub_case_vrfy_func: case type " + "%d not found\n", type); + + return (sip->swsub_case_verify); +} + +/* + * ========================== Initialization ================================ + * + * The two modules - software-diagnosis and software-response - call + * sw_fmd_init from their _fmd_init entry points. + */ + +static void +sw_add_callbacks(fmd_hdl_t *hdl, const char *who, + const struct sw_disp *dp, int nelem, struct sw_modspecific *msinfo) +{ + int i; + + (*msinfo->swms_disptbl)[msinfo->swms_dispcnt++] = dp; + + if (dp == NULL) + return; /* subsidiary failed init */ + + /* check that the nelem'th entry is the NULL termination */ + if (dp[nelem - 1].swd_classpat != NULL || + dp[nelem - 1].swd_func != NULL || dp[nelem - 1].swd_arg != NULL) + fmd_hdl_abort(hdl, "subsidiary %s dispatch table not NULL-" + "terminated\n", who); + + /* now validate the entries; we allow NULL handlers */ + for (i = 0; i < nelem - 1; i++) { + if (dp[i].swd_classpat == NULL) + fmd_hdl_abort(hdl, "subsidiary %s dispatch table entry " + "%d has a NULL pattern or function\n", who, i); + } + +} + +int +sw_fmd_init(fmd_hdl_t *hdl, const fmd_hdl_info_t *hdlinfo, + const struct sw_subinfo *(*subsid)[SW_SUB_MAX]) +{ + struct sw_modspecific *msinfo; + int i; + + if (fmd_hdl_register(hdl, FMD_API_VERSION, hdlinfo) != 0) + return (0); + + if (fmd_prop_get_int32(hdl, "enable") != B_TRUE) { + fmd_hdl_debug(hdl, "%s disabled though .conf file setting\n", + hdlinfo->fmdi_desc); + fmd_hdl_unregister(hdl); + return (0); + } + + msinfo = fmd_hdl_zalloc(hdl, sizeof (*msinfo), FMD_SLEEP); + + msinfo->swms_subinfo = subsid; + msinfo->swms_disptbl = fmd_hdl_zalloc(hdl, + SW_SUB_MAX * sizeof (struct sw_disp *), FMD_SLEEP); + + (void) pthread_mutex_init(&msinfo->swms_timerlock, NULL); + + for (i = 0; i < SW_TIMER_MAX; i++) + msinfo->swms_timers[i].swt_state = SW_TMR_UNTOUCHED; + + fmd_hdl_setspecific(hdl, (void *)msinfo); + + (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, sizeof (sw_stats) / + sizeof (fmd_stat_t), (fmd_stat_t *)&sw_stats); + + /* + * Initialize subsidiaries. Each must make any subscription + * requests it needs and return a pointer to a NULL-terminated + * callback dispatch table and an indication of the number of + * entries in that table including the NULL termination entry. + */ + for (i = 0; i < SW_SUB_MAX; i++) { + const struct sw_subinfo *sip = (*subsid)[i]; + const struct sw_disp *dp; + char dbgbuf[80]; + int nelem = -1; + int initrslt; + + if (!sip || sip->swsub_name == NULL) + break; + + initrslt = (*sip->swsub_init)(hdl, IDX2ID(i), &dp, &nelem); + + (void) snprintf(dbgbuf, sizeof (dbgbuf), + "subsidiary %d (id 0x%lx) '%s'", + i, IDX2ID(i), sip->swsub_name); + + switch (initrslt) { + case SW_SUB_INIT_SUCCESS: + if (dp == NULL || nelem < 1) + fmd_hdl_abort(hdl, "%s returned dispatch " + "table 0x%p and nelem %d\n", + dbgbuf, dp, nelem); + + fmd_hdl_debug(hdl, "%s initialized\n", dbgbuf); + sw_add_callbacks(hdl, sip->swsub_name, dp, nelem, + msinfo); + break; + + case SW_SUB_INIT_FAIL_VOLUNTARY: + fmd_hdl_debug(hdl, "%s chose not to initialize\n", + dbgbuf); + sw_add_callbacks(hdl, sip->swsub_name, NULL, -1, + msinfo); + break; + + case SW_SUB_INIT_FAIL_ERROR: + fmd_hdl_debug(hdl, "%s failed to initialize " + "because of an error\n", dbgbuf); + sw_add_callbacks(hdl, sip->swsub_name, NULL, -1, + msinfo); + break; + + default: + fmd_hdl_abort(hdl, "%s returned out-of-range result " + "%d\n", dbgbuf, initrslt); + break; + } + } + + return (1); +} + +void +sw_fmd_fini(fmd_hdl_t *hdl) +{ + const struct sw_subinfo **subinfo; + struct sw_modspecific *msinfo; + int i; + + msinfo = (struct sw_modspecific *)fmd_hdl_getspecific(hdl); + subinfo = *msinfo->swms_subinfo; + + (void) pthread_mutex_lock(&msinfo->swms_timerlock); + for (i = 0; i < SW_TIMER_MAX; i++) { + if (msinfo->swms_timers[i].swt_state != SW_TMR_INUSE) + continue; + + fmd_timer_remove(hdl, msinfo->swms_timers[i].swt_timerid); + msinfo->swms_timers[i].swt_state = SW_TMR_RMVD; + } + (void) pthread_mutex_unlock(&msinfo->swms_timerlock); + + (void) pthread_mutex_destroy(&msinfo->swms_timerlock); + + for (i = 0; i < msinfo->swms_dispcnt; i++) { + const struct sw_subinfo *sip = subinfo[i]; + + if ((*msinfo->swms_disptbl)[i] == NULL) + continue; /* swsub_init did not succeed */ + + if (sip->swsub_fini != NULL) + (*sip->swsub_fini)(hdl); + } + + fmd_hdl_free(hdl, msinfo->swms_disptbl, + SW_SUB_MAX * sizeof (struct sw_disp *)); + + fmd_hdl_setspecific(hdl, NULL); + fmd_hdl_free(hdl, msinfo, sizeof (*msinfo)); +}