Mercurial > illumos > illumos-gate
annotate usr/src/cmd/fm/fmd/common/fmd_self.c @ 12979:ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
PSARC/2009/618 snmp-notify: SNMP Notification Daemon for Software Events
PSARC/2009/619 smtp-notify: Email Notification Daemon for Software Events
PSARC/2010/225 fmd for non-global Solaris zones
PSARC/2010/226 Solaris Instance UUID
PSARC/2010/227 nvlist_nvflag(3NVPAIR)
PSARC/2010/228 libfmevent additions
PSARC/2010/257 sysevent_evc_setpropnvl and sysevent_evc_getpropnvl
PSARC/2010/265 FMRI and FMA Event Stabilty, 'ireport' category 1 event class, and the 'sw' FMRI scheme
PSARC/2010/278 FMA/SMF integration: instance state transitions
PSARC/2010/279 Modelling panics within FMA
PSARC/2010/290 logadm.conf upgrade
6392476 fmdump needs to pretty-print
6393375 userland ereport/ireport event generation interfaces
6445732 Add email notification agent for FMA and software events
6804168 RFE: Allow an efficient means to monitor SMF services status changes
6866661 scf_values_destroy(3SCF) will segfault if is passed NULL
6884709 Add snmp notification agent for FMA and software events
6884712 Add private interface to tap into libfmd_msg macro expansion capabilities
6897919 fmd to run in a non-global zone
6897937 fmd use of non-private doors is not safe
6900081 add a UUID to Solaris kernel image for use in crashdump identification
6914884 model panic events as a defect diagnosis in FMA
6944862 fmd_case_open_uuid, fmd_case_uuisresolved, fmd_nvl_create_defect
6944866 log legacy sysevents in fmd
6944867 enumerate svc scheme in topo
6944868 software-diagnosis and software-response fmd modules
6944870 model SMF maintenance state as a defect diagnosis in FMA
6944876 savecore runs in foreground for systems with zfs root and dedicated dump
6965796 Implement notification parameters for SMF state transitions and FMA events
6968287 SUN-FM-MIB.mib needs to be updated to reflect Oracle information
6972331 logadm.conf upgrade PSARC/2010/290
author | Gavin Maltby <gavin.maltby@oracle.com> |
---|---|
date | Fri, 30 Jul 2010 17:04:17 +1000 |
parents | c13e2db06244 |
children |
rev | line source |
---|---|
0 | 1 /* |
2 * CDDL HEADER START | |
3 * | |
4 * The contents of this file are subject to the terms of the | |
3323 | 5 * Common Development and Distribution License (the "License"). |
6 * You may not use this file except in compliance with the License. | |
0 | 7 * |
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 * or http://www.opensolaris.org/os/licensing. | |
10 * See the License for the specific language governing permissions | |
11 * and limitations under the License. | |
12 * | |
13 * When distributing Covered Code, include this CDDL HEADER in each | |
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 * If applicable, add the following below this CDDL HEADER, with the | |
16 * fields enclosed by brackets "[]" replaced with your own identifying | |
17 * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 * | |
19 * CDDL HEADER END | |
20 */ | |
1193 | 21 |
0 | 22 /* |
12979
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11416
diff
changeset
|
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. |
0 | 24 */ |
25 | |
26 #include <sys/fm/protocol.h> | |
27 | |
28 #include <fmd_api.h> | |
29 #include <fmd_subr.h> | |
30 #include <fmd_string.h> | |
31 #include <fmd_protocol.h> | |
32 #include <fmd_module.h> | |
33 #include <fmd_error.h> | |
34 | |
35 static struct { | |
36 fmd_stat_t nosub; | |
37 fmd_stat_t module; | |
38 } self_stats = { | |
39 { "nosub", FMD_TYPE_UINT64, "event classes with no subscribers seen" }, | |
40 { "module", FMD_TYPE_UINT64, "error events received from fmd modules" }, | |
41 }; | |
42 | |
43 typedef struct self_case { | |
44 enum { SC_CLASS, SC_MODULE } sc_kind; | |
45 char *sc_name; | |
46 } self_case_t; | |
47 | |
48 static self_case_t * | |
49 self_case_create(fmd_hdl_t *hdl, int kind, const char *name) | |
50 { | |
51 self_case_t *scp = fmd_hdl_alloc(hdl, sizeof (self_case_t), FMD_SLEEP); | |
52 | |
53 scp->sc_kind = kind; | |
54 scp->sc_name = fmd_hdl_strdup(hdl, name, FMD_SLEEP); | |
55 | |
56 return (scp); | |
57 } | |
58 | |
59 static void | |
60 self_case_destroy(fmd_hdl_t *hdl, self_case_t *scp) | |
61 { | |
62 fmd_hdl_strfree(hdl, scp->sc_name); | |
63 fmd_hdl_free(hdl, scp, sizeof (self_case_t)); | |
64 } | |
65 | |
66 static fmd_case_t * | |
67 self_case_lookup(fmd_hdl_t *hdl, int kind, const char *name) | |
68 { | |
69 fmd_case_t *cp = NULL; | |
70 | |
71 while ((cp = fmd_case_next(hdl, cp)) != NULL) { | |
72 self_case_t *scp = fmd_case_getspecific(hdl, cp); | |
73 if (scp->sc_kind == kind && strcmp(scp->sc_name, name) == 0) | |
74 break; | |
75 } | |
76 | |
77 return (cp); | |
78 } | |
79 | |
80 /*ARGSUSED*/ | |
81 static void | |
82 self_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) | |
83 { | |
84 fmd_case_t *cp; | |
85 nvlist_t *flt, *mod; | |
86 char *name; | |
87 int err = 0; | |
88 | |
89 /* | |
90 * If we get an error report from another fmd module, then create a | |
91 * case for the module and add the ereport to it. The error is either | |
92 * from fmd_hdl_error() or from fmd_api_error(). If it is the latter, | |
93 * fmd_module_error() will send another event of class EFMD_MOD_FAIL | |
94 * when the module has failed, at which point we can solve the case. | |
95 * We can also close the case on EFMD_MOD_CONF (bad config file). | |
96 */ | |
97 if (strcmp(class, fmd_errclass(EFMD_MODULE)) == 0 && | |
98 nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &mod) == 0 && | |
99 nvlist_lookup_string(mod, FM_FMRI_FMD_NAME, &name) == 0) { | |
100 | |
101 if ((cp = self_case_lookup(hdl, SC_MODULE, name)) == NULL) { | |
102 cp = fmd_case_open(hdl, | |
103 self_case_create(hdl, SC_MODULE, name)); | |
104 } | |
105 | |
106 fmd_case_add_ereport(hdl, cp, ep); | |
107 self_stats.module.fmds_value.ui64++; | |
108 (void) nvlist_lookup_int32(nvl, FMD_ERR_MOD_ERRNO, &err); | |
109 | |
110 if (err != EFMD_MOD_FAIL && err != EFMD_MOD_CONF) | |
111 return; /* module is still active, so keep case open */ | |
112 | |
113 if (fmd_case_solved(hdl, cp)) | |
114 return; /* case is already closed but error in _fini */ | |
115 | |
116 class = err == EFMD_MOD_FAIL ? FMD_FLT_MOD : FMD_FLT_CONF; | |
3323 | 117 flt = fmd_protocol_fault(class, 100, mod, NULL, NULL, NULL); |
0 | 118 |
119 fmd_case_add_suspect(hdl, cp, flt); | |
120 fmd_case_solve(hdl, cp); | |
121 | |
122 return; | |
123 } | |
124 | |
125 /* | |
126 * If we get an I/O DDI ereport, drop it for now until the I/O DE is | |
127 * implemented and integrated. Existing drivers in O/N have bugs that | |
128 * will trigger these and we don't want this producing FMD_FLT_NOSUB. | |
129 */ | |
130 if (strncmp(class, "ereport.io.ddi.", strlen("ereport.io.ddi.")) == 0) | |
131 return; /* if we got a DDI ereport, drop it for now */ | |
132 | |
133 /* | |
134 * If we get any other type of event then it is of a class for which | |
135 * there are no subscribers. Some of these correspond to internal fmd | |
136 * errors, which we ignore. Otherwise we keep one case per class and | |
137 * use it to produce a message indicating that something is awry. | |
138 */ | |
1193 | 139 if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 || |
140 strcmp(class, FM_LIST_ISOLATED_CLASS) == 0 || | |
7275 | 141 strcmp(class, FM_LIST_UPDATED_CLASS) == 0 || |
142 strcmp(class, FM_LIST_RESOLVED_CLASS) == 0 || | |
11416
c13e2db06244
6679339 undiag messages should be converted to a more useful event
Stephen Hanson <Stephen.Hanson@Sun.COM>
parents:
7275
diff
changeset
|
143 strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 || |
c13e2db06244
6679339 undiag messages should be converted to a more useful event
Stephen Hanson <Stephen.Hanson@Sun.COM>
parents:
7275
diff
changeset
|
144 strncmp(class, FM_FAULT_CLASS, strlen(FM_FAULT_CLASS)) == 0 || |
c13e2db06244
6679339 undiag messages should be converted to a more useful event
Stephen Hanson <Stephen.Hanson@Sun.COM>
parents:
7275
diff
changeset
|
145 strncmp(class, FM_DEFECT_CLASS, strlen(FM_DEFECT_CLASS)) == 0) |
7275 | 146 return; /* if no agents are present just drop list.* */ |
0 | 147 |
1193 | 148 if (strncmp(class, FMD_ERR_CLASS, FMD_ERR_CLASS_LEN) == 0) |
0 | 149 return; /* if fmd itself produced the error just drop it */ |
150 | |
1193 | 151 if (strncmp(class, FMD_RSRC_CLASS, FMD_RSRC_CLASS_LEN) == 0) |
152 return; /* if fmd itself produced the event just drop it */ | |
153 | |
7171 | 154 if (strncmp(class, SYSEVENT_RSRC_CLASS, SYSEVENT_RSRC_CLASS_LEN) == 0) |
155 return; /* sysvent resources are auto generated by fmd */ | |
156 | |
0 | 157 if (self_case_lookup(hdl, SC_CLASS, class) != NULL) |
158 return; /* case is already open against this class */ | |
159 | |
12979
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11416
diff
changeset
|
160 if (strncmp(class, FM_IREPORT_CLASS ".", |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11416
diff
changeset
|
161 sizeof (FM_IREPORT_CLASS)) == 0) |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11416
diff
changeset
|
162 return; /* no subscriber required for ireport.* */ |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11416
diff
changeset
|
163 |
0 | 164 cp = fmd_case_open(hdl, self_case_create(hdl, SC_CLASS, class)); |
165 fmd_case_add_ereport(hdl, cp, ep); | |
166 self_stats.nosub.fmds_value.ui64++; | |
167 | |
3323 | 168 flt = fmd_protocol_fault(FMD_FLT_NOSUB, 100, NULL, NULL, NULL, NULL); |
11416
c13e2db06244
6679339 undiag messages should be converted to a more useful event
Stephen Hanson <Stephen.Hanson@Sun.COM>
parents:
7275
diff
changeset
|
169 (void) nvlist_add_string(flt, "nosub_class", class); |
0 | 170 fmd_case_add_suspect(hdl, cp, flt); |
171 fmd_case_solve(hdl, cp); | |
172 } | |
173 | |
174 static void | |
175 self_close(fmd_hdl_t *hdl, fmd_case_t *cp) | |
176 { | |
177 self_case_destroy(hdl, fmd_case_getspecific(hdl, cp)); | |
178 } | |
179 | |
180 static const fmd_hdl_ops_t self_ops = { | |
181 self_recv, /* fmdo_recv */ | |
182 NULL, /* fmdo_timeout */ | |
183 self_close, /* fmdo_close */ | |
184 NULL, /* fmdo_stats */ | |
185 NULL, /* fmdo_gc */ | |
186 }; | |
187 | |
188 void | |
189 self_init(fmd_hdl_t *hdl) | |
190 { | |
191 fmd_module_t *mp = (fmd_module_t *)hdl; /* see below */ | |
192 | |
193 fmd_hdl_info_t info = { | |
194 "Fault Manager Self-Diagnosis", "1.0", &self_ops, NULL | |
195 }; | |
196 | |
197 /* | |
198 * Unlike other modules, fmd-self-diagnosis has some special needs that | |
199 * fall outside of what we want in the module API. Manually disable | |
200 * checkpointing for this module by tweaking the mod_stats values. | |
201 * The self-diagnosis world relates to fmd's running state and modules | |
202 * which all change when it restarts, so don't bother w/ checkpointing. | |
203 */ | |
204 (void) pthread_mutex_lock(&mp->mod_stats_lock); | |
205 mp->mod_stats->ms_ckpt_save.fmds_value.bool = FMD_B_FALSE; | |
206 mp->mod_stats->ms_ckpt_restore.fmds_value.bool = FMD_B_FALSE; | |
207 (void) pthread_mutex_unlock(&mp->mod_stats_lock); | |
208 | |
209 if (fmd_hdl_register(hdl, FMD_API_VERSION, &info) != 0) | |
210 return; /* failed to register with fmd */ | |
211 | |
212 (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, sizeof (self_stats) / | |
213 sizeof (fmd_stat_t), (fmd_stat_t *)&self_stats); | |
214 } | |
215 | |
216 void | |
217 self_fini(fmd_hdl_t *hdl) | |
218 { | |
219 fmd_case_t *cp = NULL; | |
220 | |
221 while ((cp = fmd_case_next(hdl, cp)) != NULL) | |
222 self_case_destroy(hdl, fmd_case_getspecific(hdl, cp)); | |
223 } |