Mercurial > illumos > illumos-gate
annotate usr/src/cmd/fm/fmd/common/fmd_sysevent.c @ 12979:ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
PSARC/2009/618 snmp-notify: SNMP Notification Daemon for Software Events
PSARC/2009/619 smtp-notify: Email Notification Daemon for Software Events
PSARC/2010/225 fmd for non-global Solaris zones
PSARC/2010/226 Solaris Instance UUID
PSARC/2010/227 nvlist_nvflag(3NVPAIR)
PSARC/2010/228 libfmevent additions
PSARC/2010/257 sysevent_evc_setpropnvl and sysevent_evc_getpropnvl
PSARC/2010/265 FMRI and FMA Event Stabilty, 'ireport' category 1 event class, and the 'sw' FMRI scheme
PSARC/2010/278 FMA/SMF integration: instance state transitions
PSARC/2010/279 Modelling panics within FMA
PSARC/2010/290 logadm.conf upgrade
6392476 fmdump needs to pretty-print
6393375 userland ereport/ireport event generation interfaces
6445732 Add email notification agent for FMA and software events
6804168 RFE: Allow an efficient means to monitor SMF services status changes
6866661 scf_values_destroy(3SCF) will segfault if is passed NULL
6884709 Add snmp notification agent for FMA and software events
6884712 Add private interface to tap into libfmd_msg macro expansion capabilities
6897919 fmd to run in a non-global zone
6897937 fmd use of non-private doors is not safe
6900081 add a UUID to Solaris kernel image for use in crashdump identification
6914884 model panic events as a defect diagnosis in FMA
6944862 fmd_case_open_uuid, fmd_case_uuisresolved, fmd_nvl_create_defect
6944866 log legacy sysevents in fmd
6944867 enumerate svc scheme in topo
6944868 software-diagnosis and software-response fmd modules
6944870 model SMF maintenance state as a defect diagnosis in FMA
6944876 savecore runs in foreground for systems with zfs root and dedicated dump
6965796 Implement notification parameters for SMF state transitions and FMA events
6968287 SUN-FM-MIB.mib needs to be updated to reflect Oracle information
6972331 logadm.conf upgrade PSARC/2010/290
author | Gavin Maltby <gavin.maltby@oracle.com> |
---|---|
date | Fri, 30 Jul 2010 17:04:17 +1000 |
parents | b91faef0c984 |
children |
rev | line source |
---|---|
0 | 1 /* |
2 * CDDL HEADER START | |
3 * | |
4 * The contents of this file are subject to the terms of the | |
2914
266e6e5b5218
6446415 sysevent-transport publishes out-of-sequence ereports after panic
stephh
parents:
1414
diff
changeset
|
5 * Common Development and Distribution License (the "License"). |
266e6e5b5218
6446415 sysevent-transport publishes out-of-sequence ereports after panic
stephh
parents:
1414
diff
changeset
|
6 * You may not use this file except in compliance with the License. |
0 | 7 * |
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 * or http://www.opensolaris.org/os/licensing. | |
10 * See the License for the specific language governing permissions | |
11 * and limitations under the License. | |
12 * | |
13 * When distributing Covered Code, include this CDDL HEADER in each | |
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 * If applicable, add the following below this CDDL HEADER, with the | |
16 * fields enclosed by brackets "[]" replaced with your own identifying | |
17 * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 * | |
19 * CDDL HEADER END | |
20 */ | |
1193 | 21 |
0 | 22 /* |
12979
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. |
0 | 24 */ |
25 | |
26 #include <sys/sysevent/eventdefs.h> | |
27 #include <sys/sysevent.h> | |
28 #include <sys/sysevent_impl.h> | |
29 #include <sys/fm/protocol.h> | |
30 #include <sys/sysmacros.h> | |
31 #include <sys/dumphdr.h> | |
32 #include <sys/dumpadm.h> | |
1414
b4126407ac5b
PSARC 2006/020 FMA for Athlon 64 and Opteron Processors
cindi
parents:
1193
diff
changeset
|
33 #include <sys/fm/util.h> |
0 | 34 |
35 #include <libsysevent.h> | |
36 #include <libnvpair.h> | |
37 #include <alloca.h> | |
38 #include <limits.h> | |
1193 | 39 #include <strings.h> |
0 | 40 #include <unistd.h> |
41 #include <fcntl.h> | |
1193 | 42 #include <errno.h> |
12979
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
43 #include <zone.h> |
0 | 44 |
45 #undef MUTEX_HELD | |
46 #undef RW_READ_HELD | |
47 #undef RW_WRITE_HELD | |
48 | |
1193 | 49 #include <fmd_api.h> |
50 #include <fmd_log.h> | |
51 #include <fmd_subr.h> | |
0 | 52 #include <fmd_dispq.h> |
7171 | 53 #include <fmd_dr.h> |
0 | 54 #include <fmd_module.h> |
7171 | 55 #include <fmd_protocol.h> |
0 | 56 #include <fmd_scheme.h> |
1193 | 57 #include <fmd_error.h> |
0 | 58 |
59 #include <fmd.h> | |
60 | |
1193 | 61 static char *sysev_channel; /* event channel to which we are subscribed */ |
62 static char *sysev_class; /* event class to which we are subscribed */ | |
63 static char *sysev_device; /* device path to use for replaying events */ | |
64 static char *sysev_sid; /* event channel subscriber identifier */ | |
65 static void *sysev_evc; /* event channel cookie from evc_bind */ | |
0 | 66 |
1193 | 67 static fmd_xprt_t *sysev_xprt; |
6081
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
68 static int sysev_xprt_refcnt; |
1193 | 69 static fmd_hdl_t *sysev_hdl; |
0 | 70 |
1193 | 71 static struct sysev_stats { |
72 fmd_stat_t dump_replay; | |
73 fmd_stat_t dump_lost; | |
74 fmd_stat_t bad_class; | |
75 fmd_stat_t bad_attr; | |
76 fmd_stat_t eagain; | |
77 } sysev_stats = { | |
78 { "dump_replay", FMD_TYPE_UINT64, "events replayed from dump device" }, | |
79 { "dump_lost", FMD_TYPE_UINT64, "events lost from dump device" }, | |
80 { "bad_class", FMD_TYPE_UINT64, "events dropped due to invalid class" }, | |
81 { "bad_attr", FMD_TYPE_UINT64, "events dropped due to invalid nvlist" }, | |
82 { "eagain", FMD_TYPE_UINT64, "events retried due to low memory" }, | |
83 }; | |
0 | 84 |
6081
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
85 static pthread_cond_t sysev_cv = PTHREAD_COND_INITIALIZER; |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
86 static pthread_mutex_t sysev_mutex = PTHREAD_MUTEX_INITIALIZER; |
2914
266e6e5b5218
6446415 sysevent-transport publishes out-of-sequence ereports after panic
stephh
parents:
1414
diff
changeset
|
87 static int sysev_replay_wait = 1; |
6081
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
88 static int sysev_exiting; |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
89 |
12979
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
90 static sysevent_subattr_t *subattr; |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
91 |
7171 | 92 /* |
93 * Entry point for legacy sysevents. This function is responsible for two | |
94 * things: passing off interesting events to the DR handler, and converting | |
95 * sysevents into resource events that modules can then subscribe to. | |
96 */ | |
97 static void | |
98 sysev_legacy(sysevent_t *sep) | |
99 { | |
100 const char *class = sysevent_get_class_name(sep); | |
101 const char *subclass = sysevent_get_subclass_name(sep); | |
102 char *fullclass; | |
103 size_t len; | |
104 nvlist_t *attr, *nvl; | |
105 hrtime_t hrt; | |
106 | |
107 /* notify the DR subsystem of the event */ | |
108 fmd_dr_event(sep); | |
109 | |
110 /* get the matching sysevent name */ | |
111 len = snprintf(NULL, 0, "%s%s.%s", SYSEVENT_RSRC_CLASS, | |
112 class, subclass); | |
113 fullclass = alloca(len + 1); | |
114 (void) snprintf(fullclass, len + 1, "%s%s.%s", | |
115 SYSEVENT_RSRC_CLASS, class, subclass); | |
116 | |
117 /* construct the event payload */ | |
118 (void) nvlist_xalloc(&nvl, NV_UNIQUE_NAME, &fmd.d_nva); | |
7243 | 119 if (sysevent_get_attr_list(sep, &attr) == 0) { |
7171 | 120 (void) nvlist_merge(nvl, attr, 0); |
7243 | 121 nvlist_free(attr); |
122 } | |
7171 | 123 |
124 /* | |
9967
e0258b956de2
6849551 Many duplicated ereports are delivered to the DE
Stephen Hanson <Stephen.Hanson@Sun.COM>
parents:
9120
diff
changeset
|
125 * Add class and version after the nvlist_merge() just in case |
e0258b956de2
6849551 Many duplicated ereports are delivered to the DE
Stephen Hanson <Stephen.Hanson@Sun.COM>
parents:
9120
diff
changeset
|
126 * the sysevent has an attribute called class or version. |
e0258b956de2
6849551 Many duplicated ereports are delivered to the DE
Stephen Hanson <Stephen.Hanson@Sun.COM>
parents:
9120
diff
changeset
|
127 */ |
e0258b956de2
6849551 Many duplicated ereports are delivered to the DE
Stephen Hanson <Stephen.Hanson@Sun.COM>
parents:
9120
diff
changeset
|
128 (void) nvlist_add_string(nvl, FM_CLASS, fullclass); |
e0258b956de2
6849551 Many duplicated ereports are delivered to the DE
Stephen Hanson <Stephen.Hanson@Sun.COM>
parents:
9120
diff
changeset
|
129 (void) nvlist_add_uint8(nvl, FM_VERSION, FM_RSRC_VERSION); |
e0258b956de2
6849551 Many duplicated ereports are delivered to the DE
Stephen Hanson <Stephen.Hanson@Sun.COM>
parents:
9120
diff
changeset
|
130 |
e0258b956de2
6849551 Many duplicated ereports are delivered to the DE
Stephen Hanson <Stephen.Hanson@Sun.COM>
parents:
9120
diff
changeset
|
131 /* |
12979
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
132 * Dispatch the event. Because we have used sysevent_bind_xhandle |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
133 * the delivery thread is blessed as a proper fmd thread so |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
134 * we may use regular fmd api calls. |
7171 | 135 */ |
136 sysevent_get_time(sep, &hrt); | |
12979
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
137 fmd_xprt_post(sysev_hdl, sysev_xprt, nvl, hrt); |
7171 | 138 } |
2914
266e6e5b5218
6446415 sysevent-transport publishes out-of-sequence ereports after panic
stephh
parents:
1414
diff
changeset
|
139 |
0 | 140 /* |
1193 | 141 * Receive an event from the SysEvent channel and post it to our transport. |
0 | 142 * Under extreme low-memory situations where we cannot event unpack the event, |
1193 | 143 * we can request that SysEvent redeliver the event later by returning EAGAIN. |
144 * If we do this too many times, the kernel will drop the event. Rather than | |
145 * keeping state per-event, we simply attempt a garbage-collect, hoping that | |
146 * enough free memory will be available by the time the event is redelivered. | |
0 | 147 */ |
148 static int | |
1193 | 149 sysev_recv(sysevent_t *sep, void *arg) |
0 | 150 { |
151 uint64_t seq = sysevent_get_seq(sep); | |
1193 | 152 fmd_xprt_t *xp = arg; |
0 | 153 nvlist_t *nvl; |
154 hrtime_t hrt; | |
6081
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
155 int rc = 0; |
0 | 156 |
6081
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
157 (void) pthread_mutex_lock(&sysev_mutex); |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
158 if (sysev_exiting == 1) { |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
159 while (sysev_xprt_refcnt > 0) |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
160 (void) pthread_cond_wait(&sysev_cv, &sysev_mutex); |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
161 (void) pthread_mutex_unlock(&sysev_mutex); |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
162 return (EAGAIN); |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
163 } |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
164 sysev_xprt_refcnt++; |
2914
266e6e5b5218
6446415 sysevent-transport publishes out-of-sequence ereports after panic
stephh
parents:
1414
diff
changeset
|
165 while (sysev_replay_wait) |
6081
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
166 (void) pthread_cond_wait(&sysev_cv, &sysev_mutex); |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
167 (void) pthread_mutex_unlock(&sysev_mutex); |
2914
266e6e5b5218
6446415 sysevent-transport publishes out-of-sequence ereports after panic
stephh
parents:
1414
diff
changeset
|
168 |
0 | 169 if (strcmp(sysevent_get_class_name(sep), EC_FM) != 0) { |
1193 | 170 fmd_hdl_error(sysev_hdl, "discarding event 0x%llx: unexpected" |
0 | 171 " transport class %s\n", seq, sysevent_get_class_name(sep)); |
1193 | 172 sysev_stats.bad_class.fmds_value.ui64++; |
6081
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
173 } else if (sysevent_get_attr_list(sep, &nvl) != 0) { |
1193 | 174 if (errno == EAGAIN || errno == ENOMEM) { |
175 fmd_modhash_tryapply(fmd.d_mod_hash, fmd_module_trygc); | |
176 fmd_scheme_hash_trygc(fmd.d_schemes); | |
177 sysev_stats.eagain.fmds_value.ui64++; | |
6081
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
178 rc = EAGAIN; |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
179 } else { |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
180 fmd_hdl_error(sysev_hdl, "discarding event 0x%llx: " |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
181 "missing or invalid payload", seq); |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
182 sysev_stats.bad_attr.fmds_value.ui64++; |
1193 | 183 } |
6081
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
184 } else { |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
185 sysevent_get_time(sep, &hrt); |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
186 fmd_xprt_post(sysev_hdl, xp, nvl, hrt); |
0 | 187 } |
188 | |
6081
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
189 (void) pthread_mutex_lock(&sysev_mutex); |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
190 if (--sysev_xprt_refcnt == 0 && sysev_exiting == 1) |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
191 (void) pthread_cond_broadcast(&sysev_cv); |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
192 (void) pthread_mutex_unlock(&sysev_mutex); |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
193 |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
194 return (rc); |
0 | 195 } |
196 | |
197 /* | |
198 * Checksum algorithm used by the dump transport for verifying the content of | |
199 * error reports saved on the dump device (copy of the kernel's checksum32()). | |
200 */ | |
1193 | 201 static uint32_t |
202 sysev_checksum(void *cp_arg, size_t length) | |
0 | 203 { |
204 uchar_t *cp, *ep; | |
205 uint32_t sum = 0; | |
206 | |
207 for (cp = cp_arg, ep = cp + length; cp < ep; cp++) | |
208 sum = ((sum >> 1) | (sum << 31)) + *cp; | |
209 | |
210 return (sum); | |
211 } | |
212 | |
1193 | 213 /* |
214 * Replay saved events from the dump transport. This function is installed as | |
215 * the timer callback and is called only once during the module's lifetime. | |
216 */ | |
217 /*ARGSUSED*/ | |
218 static void | |
219 sysev_replay(fmd_hdl_t *hdl, id_t id, void *arg) | |
0 | 220 { |
1193 | 221 char *dumpdev; |
0 | 222 off64_t off, off0; |
223 int fd, err; | |
224 | |
225 /* | |
226 * Determine the appropriate dump device to use for replaying pending | |
1193 | 227 * error reports. If the device property is NULL (default), we |
0 | 228 * open and query /dev/dump to determine the current dump device. |
229 */ | |
1193 | 230 if ((dumpdev = sysev_device) == NULL) { |
0 | 231 if ((fd = open("/dev/dump", O_RDONLY)) == -1) { |
1193 | 232 fmd_hdl_error(hdl, "failed to open /dev/dump " |
0 | 233 "to locate dump device for event replay"); |
2914
266e6e5b5218
6446415 sysevent-transport publishes out-of-sequence ereports after panic
stephh
parents:
1414
diff
changeset
|
234 goto done; |
0 | 235 } |
236 | |
237 dumpdev = alloca(PATH_MAX); | |
238 err = ioctl(fd, DIOCGETDEV, dumpdev); | |
239 (void) close(fd); | |
240 | |
241 if (err == -1) { | |
242 if (errno != ENODEV) { | |
1193 | 243 fmd_hdl_error(hdl, "failed to obtain " |
0 | 244 "path to dump device for event replay"); |
245 } | |
2914
266e6e5b5218
6446415 sysevent-transport publishes out-of-sequence ereports after panic
stephh
parents:
1414
diff
changeset
|
246 goto done; |
0 | 247 } |
248 } | |
249 | |
250 if (strcmp(dumpdev, "/dev/null") == 0) | |
2914
266e6e5b5218
6446415 sysevent-transport publishes out-of-sequence ereports after panic
stephh
parents:
1414
diff
changeset
|
251 goto done; /* return silently and skip replay for /dev/null */ |
0 | 252 |
253 /* | |
254 * Open the appropriate device and then determine the offset of the | |
255 * start of the ereport dump region located at the end of the device. | |
256 */ | |
257 if ((fd = open64(dumpdev, O_RDWR | O_DSYNC)) == -1) { | |
1193 | 258 fmd_hdl_error(hdl, "failed to open dump transport %s " |
0 | 259 "(pending events will not be replayed)", dumpdev); |
2914
266e6e5b5218
6446415 sysevent-transport publishes out-of-sequence ereports after panic
stephh
parents:
1414
diff
changeset
|
260 goto done; |
0 | 261 } |
262 | |
263 off = DUMP_OFFSET + DUMP_LOGSIZE + DUMP_ERPTSIZE; | |
264 off = off0 = lseek64(fd, -off, SEEK_END) & -DUMP_OFFSET; | |
265 | |
266 if (off == (off64_t)-1LL) { | |
1193 | 267 fmd_hdl_error(hdl, "failed to seek dump transport %s " |
0 | 268 "(pending events will not be replayed)", dumpdev); |
269 (void) close(fd); | |
2914
266e6e5b5218
6446415 sysevent-transport publishes out-of-sequence ereports after panic
stephh
parents:
1414
diff
changeset
|
270 goto done; |
0 | 271 } |
272 | |
273 /* | |
274 * The ereport dump region is a sequence of erpt_dump_t headers each of | |
275 * which is followed by packed nvlist data. We iterate over them in | |
276 * order, unpacking and dispatching each one to our dispatch queue. | |
277 */ | |
278 for (;;) { | |
279 char nvbuf[ERPT_DATA_SZ]; | |
280 uint32_t chksum; | |
281 erpt_dump_t ed; | |
282 nvlist_t *nvl; | |
283 | |
284 fmd_timeval_t ftv, tod; | |
285 hrtime_t hrt; | |
286 uint64_t ena; | |
287 | |
288 if (pread64(fd, &ed, sizeof (ed), off) != sizeof (ed)) { | |
1193 | 289 fmd_hdl_error(hdl, "failed to read from dump " |
0 | 290 "transport %s (pending events lost)", dumpdev); |
291 break; | |
292 } | |
293 | |
294 if (ed.ed_magic == 0 && ed.ed_size == 0) | |
295 break; /* end of list: all zero */ | |
296 | |
297 if (ed.ed_magic == 0) { | |
298 off += sizeof (ed) + ed.ed_size; | |
299 continue; /* continue searching */ | |
300 } | |
301 | |
302 if (ed.ed_magic != ERPT_MAGIC) { | |
303 /* | |
304 * Stop reading silently if the first record has the | |
305 * wrong magic number; this likely indicates that we | |
306 * rebooted from non-FMA bits or paged over the dump. | |
307 */ | |
308 if (off == off0) | |
309 break; | |
310 | |
1193 | 311 fmd_hdl_error(hdl, "invalid dump transport " |
0 | 312 "record at %llx (magic number %x, expected %x)\n", |
313 (u_longlong_t)off, ed.ed_magic, ERPT_MAGIC); | |
314 break; | |
315 } | |
316 | |
317 if (ed.ed_size > ERPT_DATA_SZ) { | |
1193 | 318 fmd_hdl_error(hdl, "invalid dump transport " |
0 | 319 "record at %llx size (%u exceeds limit)\n", |
320 (u_longlong_t)off, ed.ed_size); | |
321 break; | |
322 } | |
323 | |
324 if (pread64(fd, nvbuf, ed.ed_size, | |
325 off + sizeof (ed)) != ed.ed_size) { | |
1193 | 326 fmd_hdl_error(hdl, "failed to read dump " |
0 | 327 "transport event (offset %llx)", (u_longlong_t)off); |
328 | |
1193 | 329 sysev_stats.dump_lost.fmds_value.ui64++; |
0 | 330 goto next; |
331 } | |
332 | |
1193 | 333 if ((chksum = sysev_checksum(nvbuf, |
0 | 334 ed.ed_size)) != ed.ed_chksum) { |
1193 | 335 fmd_hdl_error(hdl, "dump transport event at " |
0 | 336 "offset %llx is corrupt (checksum %x != %x)\n", |
337 (u_longlong_t)off, chksum, ed.ed_chksum); | |
338 | |
1193 | 339 sysev_stats.dump_lost.fmds_value.ui64++; |
0 | 340 goto next; |
341 } | |
342 | |
343 if ((err = nvlist_xunpack(nvbuf, | |
344 ed.ed_size, &nvl, &fmd.d_nva)) != 0) { | |
1193 | 345 fmd_hdl_error(hdl, "failed to unpack dump " |
0 | 346 "transport event at offset %llx: %s\n", |
347 (u_longlong_t)off, fmd_strerror(err)); | |
348 | |
1193 | 349 sysev_stats.dump_lost.fmds_value.ui64++; |
0 | 350 goto next; |
351 } | |
352 | |
353 /* | |
354 * If ed_hrt_nsec is set it contains the gethrtime() value from | |
355 * when the event was originally enqueued for the transport. | |
356 * If it is zero, we use the weaker bound ed_hrt_base instead. | |
357 */ | |
358 if (ed.ed_hrt_nsec != 0) | |
359 hrt = ed.ed_hrt_nsec; | |
360 else | |
361 hrt = ed.ed_hrt_base; | |
362 | |
363 /* | |
364 * If this is an FMA protocol event of class "ereport.*" that | |
365 * contains valid ENA, we can improve the precision of 'hrt'. | |
366 */ | |
1193 | 367 if (nvlist_lookup_uint64(nvl, FM_EREPORT_ENA, &ena) == 0) |
0 | 368 hrt = fmd_time_ena2hrt(hrt, ena); |
369 | |
370 /* | |
371 * Now convert 'hrt' to an adjustable TOD based on the values | |
372 * in ed_tod_base which correspond to one another and are | |
373 * sampled before reboot using the old gethrtime() clock. | |
374 * fmd_event_recreate() will use this TOD value to re-assign | |
375 * the event an updated gethrtime() value based on the current | |
376 * value of the non-adjustable gethrtime() clock. Phew. | |
377 */ | |
378 tod.ftv_sec = ed.ed_tod_base.sec; | |
379 tod.ftv_nsec = ed.ed_tod_base.nsec; | |
380 fmd_time_hrt2tod(ed.ed_hrt_base, &tod, hrt, &ftv); | |
381 | |
1193 | 382 (void) nvlist_remove_all(nvl, FMD_EVN_TOD); |
383 (void) nvlist_add_uint64_array(nvl, | |
384 FMD_EVN_TOD, (uint64_t *)&ftv, 2); | |
0 | 385 |
1193 | 386 fmd_xprt_post(hdl, sysev_xprt, nvl, 0); |
387 sysev_stats.dump_replay.fmds_value.ui64++; | |
0 | 388 |
389 next: | |
390 /* | |
391 * Reset the magic number for the event record to zero so that | |
392 * we do not replay the same event multiple times. | |
393 */ | |
394 ed.ed_magic = 0; | |
395 | |
396 if (pwrite64(fd, &ed, sizeof (ed), off) != sizeof (ed)) { | |
1193 | 397 fmd_hdl_error(hdl, "failed to mark dump " |
0 | 398 "transport event (offset %llx)", (u_longlong_t)off); |
399 } | |
400 | |
401 off += sizeof (ed) + ed.ed_size; | |
402 } | |
403 | |
404 (void) close(fd); | |
2914
266e6e5b5218
6446415 sysevent-transport publishes out-of-sequence ereports after panic
stephh
parents:
1414
diff
changeset
|
405 done: |
6081
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
406 (void) pthread_mutex_lock(&sysev_mutex); |
2914
266e6e5b5218
6446415 sysevent-transport publishes out-of-sequence ereports after panic
stephh
parents:
1414
diff
changeset
|
407 sysev_replay_wait = 0; |
6081
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
408 (void) pthread_cond_broadcast(&sysev_cv); |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
409 (void) pthread_mutex_unlock(&sysev_mutex); |
0 | 410 } |
1193 | 411 |
412 static const fmd_prop_t sysev_props[] = { | |
413 { "class", FMD_TYPE_STRING, EC_ALL }, /* event class */ | |
414 { "device", FMD_TYPE_STRING, NULL }, /* replay device */ | |
415 { "channel", FMD_TYPE_STRING, FM_ERROR_CHAN }, /* channel name */ | |
416 { "sid", FMD_TYPE_STRING, "fmd" }, /* subscriber id */ | |
417 { NULL, 0, NULL } | |
418 }; | |
419 | |
420 static const fmd_hdl_ops_t sysev_ops = { | |
421 NULL, /* fmdo_recv */ | |
422 sysev_replay, /* fmdo_timeout */ | |
423 NULL, /* fmdo_close */ | |
424 NULL, /* fmdo_stats */ | |
425 NULL, /* fmdo_gc */ | |
426 NULL, /* fmdo_send */ | |
427 }; | |
428 | |
429 static const fmd_hdl_info_t sysev_info = { | |
430 "SysEvent Transport Agent", "1.0", &sysev_ops, sysev_props | |
431 }; | |
432 | |
433 /* | |
434 * Bind to the sysevent channel we use for listening for error events and then | |
7171 | 435 * subscribe to appropriate events received over this channel. Setup the |
436 * legacy sysevent handler for creating sysevent resources and forwarding DR | |
437 * events. | |
1193 | 438 */ |
439 void | |
440 sysev_init(fmd_hdl_t *hdl) | |
441 { | |
442 uint_t flags; | |
7171 | 443 const char *subclasses[] = { EC_SUB_ALL }; |
1193 | 444 |
12979
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
445 /* This builtin is for the global zone only */ |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
446 if (getzoneid() != GLOBAL_ZONEID) |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
447 return; |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
448 |
1193 | 449 if (fmd_hdl_register(hdl, FMD_API_VERSION, &sysev_info) != 0) |
450 return; /* invalid property settings */ | |
451 | |
452 (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, sizeof (sysev_stats) / | |
453 sizeof (fmd_stat_t), (fmd_stat_t *)&sysev_stats); | |
454 | |
455 sysev_channel = fmd_prop_get_string(hdl, "channel"); | |
456 sysev_class = fmd_prop_get_string(hdl, "class"); | |
457 sysev_device = fmd_prop_get_string(hdl, "device"); | |
458 sysev_sid = fmd_prop_get_string(hdl, "sid"); | |
459 | |
460 if (sysev_channel == NULL) | |
461 fmd_hdl_abort(hdl, "channel property must be defined\n"); | |
462 | |
463 if (sysev_sid == NULL) | |
464 fmd_hdl_abort(hdl, "sid property must be defined\n"); | |
465 | |
466 if ((errno = sysevent_evc_bind(sysev_channel, &sysev_evc, | |
467 EVCH_CREAT | EVCH_HOLD_PEND)) != 0) { | |
468 fmd_hdl_abort(hdl, "failed to bind to event transport " | |
469 "channel %s", sysev_channel); | |
470 } | |
471 | |
9120
fe1f7d8cd967
6533823 need better way of proxying faults across event transport
Stephen Hanson <Stephen.Hanson@Sun.COM>
parents:
7243
diff
changeset
|
472 sysev_xprt = fmd_xprt_open(hdl, FMD_XPRT_RDONLY | |
fe1f7d8cd967
6533823 need better way of proxying faults across event transport
Stephen Hanson <Stephen.Hanson@Sun.COM>
parents:
7243
diff
changeset
|
473 FMD_XPRT_CACHE_AS_LOCAL, NULL, NULL); |
1193 | 474 sysev_hdl = hdl; |
475 | |
476 /* | |
477 * If we're subscribing to the default channel, keep our subscription | |
478 * active even if we die unexpectedly so we continue queuing events. | |
479 * If we're not (e.g. running under fmsim), do not specify SUB_KEEP so | |
480 * that our event channel will be destroyed if we die unpleasantly. | |
481 */ | |
482 if (strcmp(sysev_channel, FM_ERROR_CHAN) == 0) | |
483 flags = EVCH_SUB_KEEP | EVCH_SUB_DUMP; | |
484 else | |
485 flags = EVCH_SUB_DUMP; | |
486 | |
12979
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
487 if ((subattr = sysevent_subattr_alloc()) == NULL) |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
488 fmd_hdl_abort(hdl, "failed to allocate subscription " |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
489 "attributes"); |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
490 |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
491 sysevent_subattr_thrcreate(subattr, fmd_doorthr_create, NULL); |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
492 sysevent_subattr_thrsetup(subattr, fmd_doorthr_setup, NULL); |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
493 |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
494 errno = sysevent_evc_xsubscribe(sysev_evc, |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
495 sysev_sid, sysev_class, sysev_recv, sysev_xprt, flags, subattr); |
1193 | 496 |
497 if (errno != 0) { | |
498 if (errno == EEXIST) { | |
499 fmd_hdl_abort(hdl, "another fault management daemon is " | |
500 "active on transport channel %s\n", sysev_channel); | |
501 } else { | |
12979
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
502 fmd_hdl_abort(hdl, "failed to xsubscribe to %s on " |
1193 | 503 "transport channel %s", sysev_class, sysev_channel); |
504 } | |
505 } | |
506 | |
507 /* | |
508 * Once the transport is open, install a single timer to fire at once | |
509 * in the context of the module's thread to run sysev_replay(). This | |
510 * thread will block in its first fmd_xprt_post() until fmd is ready. | |
511 */ | |
512 fmd_hdl_debug(hdl, "transport '%s' open\n", sysev_channel); | |
513 (void) fmd_timer_install(hdl, NULL, NULL, 0); | |
7171 | 514 |
515 /* | |
516 * Open the legacy sysevent handle and subscribe to all events. These | |
517 * are automatically converted to "resource.sysevent.*" events so that | |
518 * modules can manage these events without additional infrastructure. | |
519 */ | |
520 if (geteuid() != 0) | |
521 return; | |
522 | |
523 if ((fmd.d_sysev_hdl = | |
12979
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
524 sysevent_bind_xhandle(sysev_legacy, subattr)) == NULL) |
7171 | 525 fmd_hdl_abort(hdl, "failed to bind to legacy sysevent channel"); |
526 | |
527 if (sysevent_subscribe_event(fmd.d_sysev_hdl, EC_ALL, | |
528 subclasses, 1) != 0) | |
529 fmd_hdl_abort(hdl, "failed to subscribe to legacy sysevents"); | |
1193 | 530 } |
531 | |
532 /* | |
533 * Close the channel by unsubscribing and unbinding. We only do this when a | |
534 * a non-default channel has been selected. If we're using FM_ERROR_CHAN, | |
535 * the system default, we do *not* want to unsubscribe because the kernel will | |
536 * remove the subscriber queue and any events published in our absence will | |
537 * therefore be lost. This scenario may occur when, for example, fmd is sent | |
538 * a SIGTERM by init(1M) during reboot but an error is detected and makes it | |
539 * into the sysevent channel queue before init(1M) manages to call uadmin(2). | |
540 */ | |
541 void | |
542 sysev_fini(fmd_hdl_t *hdl) | |
543 { | |
544 if (strcmp(sysev_channel, FM_ERROR_CHAN) != 0) { | |
11102
b91faef0c984
PSARC/2009/554 door_xcreate - extended door creation interface for private doors
Gavin Maltby <Gavin.Maltby@Sun.COM>
parents:
9967
diff
changeset
|
545 (void) sysevent_evc_unsubscribe(sysev_evc, sysev_sid); |
b91faef0c984
PSARC/2009/554 door_xcreate - extended door creation interface for private doors
Gavin Maltby <Gavin.Maltby@Sun.COM>
parents:
9967
diff
changeset
|
546 (void) sysevent_evc_unbind(sysev_evc); |
1193 | 547 } |
548 | |
7171 | 549 if (fmd.d_sysev_hdl != NULL) |
550 sysevent_unbind_handle(fmd.d_sysev_hdl); | |
551 | |
12979
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
552 if (subattr != NULL) { |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
553 sysevent_subattr_free(subattr); |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
554 subattr = NULL; |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
555 } |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11102
diff
changeset
|
556 |
6081
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
557 if (sysev_xprt != NULL) { |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
558 /* |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
559 * Wait callback returns before destroy the transport. |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
560 */ |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
561 (void) pthread_mutex_lock(&sysev_mutex); |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
562 sysev_exiting = 1; |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
563 while (sysev_xprt_refcnt > 0) |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
564 (void) pthread_cond_wait(&sysev_cv, &sysev_mutex); |
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
565 (void) pthread_mutex_unlock(&sysev_mutex); |
1193 | 566 fmd_xprt_close(hdl, sysev_xprt); |
6081
e21b9c494333
6641803 fmd deadlock in fmd_case_hash_lookup and fmd_case_rele
cy152378
parents:
2914
diff
changeset
|
567 } |
1193 | 568 |
569 fmd_prop_free_string(hdl, sysev_class); | |
570 fmd_prop_free_string(hdl, sysev_channel); | |
571 fmd_prop_free_string(hdl, sysev_device); | |
572 fmd_prop_free_string(hdl, sysev_sid); | |
573 } |