Mercurial > illumos > illumos-gate
comparison usr/src/lib/librestart/common/librestart.c @ 12979:ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
PSARC/2009/618 snmp-notify: SNMP Notification Daemon for Software Events
PSARC/2009/619 smtp-notify: Email Notification Daemon for Software Events
PSARC/2010/225 fmd for non-global Solaris zones
PSARC/2010/226 Solaris Instance UUID
PSARC/2010/227 nvlist_nvflag(3NVPAIR)
PSARC/2010/228 libfmevent additions
PSARC/2010/257 sysevent_evc_setpropnvl and sysevent_evc_getpropnvl
PSARC/2010/265 FMRI and FMA Event Stabilty, 'ireport' category 1 event class, and the 'sw' FMRI scheme
PSARC/2010/278 FMA/SMF integration: instance state transitions
PSARC/2010/279 Modelling panics within FMA
PSARC/2010/290 logadm.conf upgrade
6392476 fmdump needs to pretty-print
6393375 userland ereport/ireport event generation interfaces
6445732 Add email notification agent for FMA and software events
6804168 RFE: Allow an efficient means to monitor SMF services status changes
6866661 scf_values_destroy(3SCF) will segfault if is passed NULL
6884709 Add snmp notification agent for FMA and software events
6884712 Add private interface to tap into libfmd_msg macro expansion capabilities
6897919 fmd to run in a non-global zone
6897937 fmd use of non-private doors is not safe
6900081 add a UUID to Solaris kernel image for use in crashdump identification
6914884 model panic events as a defect diagnosis in FMA
6944862 fmd_case_open_uuid, fmd_case_uuisresolved, fmd_nvl_create_defect
6944866 log legacy sysevents in fmd
6944867 enumerate svc scheme in topo
6944868 software-diagnosis and software-response fmd modules
6944870 model SMF maintenance state as a defect diagnosis in FMA
6944876 savecore runs in foreground for systems with zfs root and dedicated dump
6965796 Implement notification parameters for SMF state transitions and FMA events
6968287 SUN-FM-MIB.mib needs to be updated to reflect Oracle information
6972331 logadm.conf upgrade PSARC/2010/290
author | Gavin Maltby <gavin.maltby@oracle.com> |
---|---|
date | Fri, 30 Jul 2010 17:04:17 +1000 |
parents | 2522fef20c5f |
children | 33bb96bf701a |
comparison
equal
deleted
inserted
replaced
12978:19d842faf8e4 | 12979:ab9ae749152f |
---|---|
18 * | 18 * |
19 * CDDL HEADER END | 19 * CDDL HEADER END |
20 */ | 20 */ |
21 | 21 |
22 /* | 22 /* |
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. | 23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. |
24 * Use is subject to license terms. | |
25 */ | 24 */ |
26 | 25 |
26 #include <libintl.h> | |
27 #include <librestart.h> | 27 #include <librestart.h> |
28 #include <librestart_priv.h> | 28 #include <librestart_priv.h> |
29 #include <libscf.h> | 29 #include <libscf.h> |
30 #include <libscf_priv.h> | 30 #include <libscf_priv.h> |
31 | 31 |
106 restarter_instance_state_t re_state; | 106 restarter_instance_state_t re_state; |
107 restarter_instance_state_t re_next_state; | 107 restarter_instance_state_t re_next_state; |
108 }; | 108 }; |
109 | 109 |
110 /* | 110 /* |
111 * Long reasons must all parse/read correctly in the following contexts: | |
112 * | |
113 * "A service instance transitioned state: %s." | |
114 * "A service failed: %s." | |
115 * "Reason: %s." | |
116 * "The service transitioned state (%s) and ..." | |
117 * | |
118 * With the exception of restart_str_none they must also fit the following | |
119 * moulds: | |
120 * | |
121 * "An instance transitioned because %s, and ..." | |
122 * "An instance transitioned to <new-state> because %s, and ..." | |
123 * | |
124 * Note that whoever is rendering the long message must provide the | |
125 * terminal punctuation - don't include it here. Similarly, do not | |
126 * provide an initial capital letter in reason-long. | |
127 * | |
128 * The long reason strings are Volatile - within the grammatical constraints | |
129 * above we may improve them as need be. The intention is that a consumer | |
130 * may blindly render the string along the lines of the above examples, | |
131 * but has no other guarantees as to the exact wording. Long reasons | |
132 * are localized. | |
133 * | |
134 * We define revisions of the set of short reason strings in use. Within | |
135 * a given revision, all short reasons are Committed. Consumers must check | |
136 * the revision in use before relying on the semantics of the short reason | |
137 * codes - if the version exceeds that which they are familiar with they should | |
138 * fail gracefully. Having checked for version compatability, a consumer | |
139 * is assured that | |
140 * | |
141 * "short_reason_A iff semantic_A", provided: | |
142 * | |
143 * . the restarter uses this short reason code at all, | |
144 * . the short reason is not "none" (which a restarter could | |
145 * specifiy for any transition semantics) | |
146 * | |
147 * To split/refine such a Committed semantic_A into further cases, | |
148 * we are required to bump the revision number. This should be an | |
149 * infrequent occurence. If you bump the revision number you may | |
150 * need to make corresponding changes in any source that calls | |
151 * restarter_str_version (e.g., FMA event generation). | |
152 * | |
153 * To add additional reasons to the set you must also bump the version | |
154 * number. | |
155 */ | |
156 | |
157 /* | |
158 * The following describes revision 0 of the set of transition reasons. | |
159 * Read the preceding block comment before making any changes. | |
160 */ | |
161 static const struct restarter_state_transition_reason restarter_str[] = { | |
162 /* | |
163 * Any transition for which the restarter has not provided a reason. | |
164 */ | |
165 { | |
166 restarter_str_none, | |
167 "none", | |
168 "the restarter gave no reason" | |
169 }, | |
170 | |
171 /* | |
172 * A transition to maintenance state due to a | |
173 * 'svcadm mark maintenance <fmri>'. *Not* used if the libscf | |
174 * interface smf_maintain_instance(3SCF) is used to request maintenance. | |
175 */ | |
176 { | |
177 restarter_str_administrative_request, | |
178 "administrative_request", | |
179 "maintenance was requested by an administrator" | |
180 }, | |
181 | |
182 /* | |
183 * A transition to maintenance state if a repository inconsistency | |
184 * exists when the service/instance state is first read by startd | |
185 * into the graph engine (this can also happen during startd restart). | |
186 */ | |
187 { | |
188 restarter_str_bad_repo_state, | |
189 "bad_repo_state", | |
190 "an SMF repository inconsistecy exists" | |
191 }, | |
192 | |
193 /* | |
194 * A transition 'maintenance -> uninitialized' resulting always | |
195 * from 'svcadm clear <fmri>'. *Not* used if the libscf interface | |
196 * smf_restore_instance(3SCF) is used. | |
197 */ | |
198 { | |
199 restarter_str_clear_request, | |
200 "clear_request", | |
201 "maintenance clear was requested by an administrator" | |
202 }, | |
203 | |
204 /* | |
205 * A transition 'online -> offline' due to a process core dump. | |
206 */ | |
207 { | |
208 restarter_str_ct_ev_core, | |
209 "ct_ev_core", | |
210 "a process dumped core" | |
211 }, | |
212 | |
213 /* | |
214 * A transition 'online -> offline' due to an empty process contract, | |
215 * i.e., the last process in a contract type service has exited. | |
216 */ | |
217 { | |
218 restarter_str_ct_ev_exit, | |
219 "ct_ev_exit", | |
220 "all processes in the service have exited" | |
221 }, | |
222 | |
223 /* | |
224 * A transition 'online -> offline' due to a hardware error. | |
225 */ | |
226 { | |
227 restarter_str_ct_ev_hwerr, | |
228 "ct_ev_hwerr", | |
229 "a process was killed due to uncorrectable hardware error" | |
230 }, | |
231 | |
232 /* | |
233 * A transition 'online -> offline' due to a process in the service | |
234 * having received a fatal signal originating from outside the | |
235 * service process contract. | |
236 */ | |
237 { | |
238 restarter_str_ct_ev_signal, | |
239 "ct_ev_signal", | |
240 "a process received a fatal signal from outside the service" | |
241 }, | |
242 | |
243 /* | |
244 * A transition 'offline -> online' when all dependencies for the | |
245 * service have been met. | |
246 */ | |
247 { | |
248 restarter_str_dependencies_satisfied, | |
249 "dependencies_satisfied", | |
250 "all dependencies have been satisfied" | |
251 }, | |
252 | |
253 /* | |
254 * A transition 'online -> offline' because some dependency for the | |
255 * service is no-longer met. | |
256 */ | |
257 { | |
258 restarter_str_dependency_activity, | |
259 "dependency_activity", | |
260 "a dependency activity required a stop" | |
261 }, | |
262 | |
263 /* | |
264 * A transition to maintenance state due to a cycle in the | |
265 * service dependencies. | |
266 */ | |
267 { | |
268 restarter_str_dependency_cycle, | |
269 "dependency_cycle", | |
270 "a dependency cycle exists" | |
271 }, | |
272 | |
273 /* | |
274 * A transition 'online -> offline -> disabled' due to a | |
275 * 'svcadm disable [-t] <fmri>' or smf_disable_instance(3SCF) call. | |
276 */ | |
277 { | |
278 restarter_str_disable_request, | |
279 "disable_request", | |
280 "a disable was requested" | |
281 }, | |
282 | |
283 /* | |
284 * A transition 'disabled -> offline' due to a | |
285 * 'svcadm enable [-t] <fmri>' or smf_enable_instance(3SCF) call. | |
286 */ | |
287 { | |
288 restarter_str_enable_request, | |
289 "enable_request", | |
290 "an enable was requested" | |
291 }, | |
292 | |
293 /* | |
294 * A transition to maintenance state when a method fails | |
295 * repeatedly for a retryable reason. | |
296 */ | |
297 { | |
298 restarter_str_fault_threshold_reached, | |
299 "fault_threshold_reached", | |
300 "a method is failing in a retryable manner but too often" | |
301 }, | |
302 | |
303 /* | |
304 * A transition to uninitialized state when startd reads the service | |
305 * configuration and inserts it into the graph engine. | |
306 */ | |
307 { | |
308 restarter_str_insert_in_graph, | |
309 "insert_in_graph", | |
310 "the instance was inserted in the graph" | |
311 }, | |
312 | |
313 /* | |
314 * A transition to maintenance state due to an invalid dependency | |
315 * declared for the service. | |
316 */ | |
317 { | |
318 restarter_str_invalid_dependency, | |
319 "invalid_dependency", | |
320 "a service has an invalid dependency" | |
321 }, | |
322 | |
323 /* | |
324 * A transition to maintenance state because the service-declared | |
325 * restarter is invalid. | |
326 */ | |
327 { | |
328 restarter_str_invalid_restarter, | |
329 "invalid_restarter", | |
330 "the service restarter is invalid" | |
331 }, | |
332 | |
333 /* | |
334 * A transition to maintenance state because a restarter method | |
335 * exited with one of SMF_EXIT_ERR_CONFIG, SMF_EXIT_ERR_NOSMF, | |
336 * SMF_EXIT_ERR_PERM, or SMF_EXIT_ERR_FATAL. | |
337 */ | |
338 { | |
339 restarter_str_method_failed, | |
340 "method_failed", | |
341 "a start, stop or refresh method failed" | |
342 }, | |
343 | |
344 /* | |
345 * A transition 'uninitialized -> {disabled|offline}' after | |
346 * "insert_in_graph" to match the state configured in the | |
347 * repository. | |
348 */ | |
349 { | |
350 restarter_str_per_configuration, | |
351 "per_configuration", | |
352 "the SMF repository configuration specifies this state" | |
353 }, | |
354 | |
355 /* | |
356 * Refresh requested - no state change. | |
357 */ | |
358 { | |
359 restarter_str_refresh, | |
360 NULL, | |
361 "a refresh was requested (no change of state)" | |
362 }, | |
363 | |
364 /* | |
365 * A transition 'online -> offline -> online' due to a | |
366 * 'svcadm restart <fmri> or equivlaent libscf API call. | |
367 * Both the 'online -> offline' and 'offline -> online' transtions | |
368 * specify this reason. | |
369 */ | |
370 { | |
371 restarter_str_restart_request, | |
372 "restart_request", | |
373 "a restart was requested" | |
374 }, | |
375 | |
376 /* | |
377 * A transition to maintenance state because the start method is | |
378 * being executed successfully but too frequently. | |
379 */ | |
380 { | |
381 restarter_str_restarting_too_quickly, | |
382 "restarting_too_quickly", | |
383 "the instance is restarting too quickly" | |
384 }, | |
385 | |
386 /* | |
387 * A transition to maintenance state due a service requesting | |
388 * 'svcadm mark maintenance <fmri>' or equivalent libscf API call. | |
389 * A command line 'svcadm mark maintenance <fmri>' does not produce | |
390 * this reason - it produces administrative_request instead. | |
391 */ | |
392 { | |
393 restarter_str_service_request, | |
394 "service_request", | |
395 "maintenance was requested by another service" | |
396 }, | |
397 | |
398 /* | |
399 * An instanced inserted into the graph at its existing state | |
400 * during a startd restart - no state change. | |
401 */ | |
402 { | |
403 restarter_str_startd_restart, | |
404 NULL, | |
405 "the instance was inserted in the graph due to startd restart" | |
406 } | |
407 }; | |
408 | |
409 uint32_t | |
410 restarter_str_version(void) | |
411 { | |
412 return (RESTARTER_STRING_VERSION); | |
413 } | |
414 | |
415 const char * | |
416 restarter_get_str_short(restarter_str_t key) | |
417 { | |
418 int i; | |
419 for (i = 0; i < sizeof (restarter_str) / | |
420 sizeof (struct restarter_state_transition_reason); i++) | |
421 if (key == restarter_str[i].str_key) | |
422 return (restarter_str[i].str_short); | |
423 return (NULL); | |
424 } | |
425 | |
426 const char * | |
427 restarter_get_str_long(restarter_str_t key) | |
428 { | |
429 int i; | |
430 for (i = 0; i < sizeof (restarter_str) / | |
431 sizeof (struct restarter_state_transition_reason); i++) | |
432 if (key == restarter_str[i].str_key) | |
433 return (dgettext(TEXT_DOMAIN, | |
434 restarter_str[i].str_long)); | |
435 return (NULL); | |
436 } | |
437 | |
438 /* | |
111 * A static no memory error message mc_error_t structure | 439 * A static no memory error message mc_error_t structure |
112 * to be used in cases when memory errors are to be returned | 440 * to be used in cases when memory errors are to be returned |
113 * This avoids the need to attempt to allocate memory for the | 441 * This avoids the need to attempt to allocate memory for the |
114 * message, therefore getting into a cycle of no memory failures. | 442 * message, therefore getting into a cycle of no memory failures. |
115 */ | 443 */ |
493 | 821 |
494 /* | 822 /* |
495 * Commit the state, next state, and auxiliary state into the repository. | 823 * Commit the state, next state, and auxiliary state into the repository. |
496 * Let the graph engine know about the state change and error. On success, | 824 * Let the graph engine know about the state change and error. On success, |
497 * return 0. On error, return | 825 * return 0. On error, return |
498 * EINVAL - aux has spaces | |
499 * - inst is invalid or not an instance FMRI | |
500 * EPROTO - librestart compiled against different libscf | 826 * EPROTO - librestart compiled against different libscf |
501 * ENOMEM - out of memory | 827 * ENOMEM - out of memory |
502 * - repository server out of resources | 828 * - repository server out of resources |
503 * ENOTACTIVE - repository server not running | 829 * ENOTACTIVE - repository server not running |
504 * ECONNABORTED - repository connection established, but then broken | 830 * ECONNABORTED - repository connection established, but then broken |
515 restarter_set_states(restarter_event_handle_t *h, const char *inst, | 841 restarter_set_states(restarter_event_handle_t *h, const char *inst, |
516 restarter_instance_state_t cur_state, | 842 restarter_instance_state_t cur_state, |
517 restarter_instance_state_t new_cur_state, | 843 restarter_instance_state_t new_cur_state, |
518 restarter_instance_state_t next_state, | 844 restarter_instance_state_t next_state, |
519 restarter_instance_state_t new_next_state, restarter_error_t e, | 845 restarter_instance_state_t new_next_state, restarter_error_t e, |
520 const char *aux) | 846 restarter_str_t aux) |
521 { | 847 { |
522 nvlist_t *attr; | 848 nvlist_t *attr; |
523 scf_handle_t *scf_h; | 849 scf_handle_t *scf_h; |
524 instance_data_t id; | 850 instance_data_t id; |
525 int ret = 0; | 851 int ret = 0; |
526 char *p = (char *)aux; | 852 const char *p = restarter_get_str_short(aux); |
527 | 853 |
528 assert(h->reh_master_channel != NULL); | 854 assert(h->reh_master_channel != NULL); |
529 assert(h->reh_master_channel_name != NULL); | 855 assert(h->reh_master_channel_name != NULL); |
530 assert(h->reh_master_subscriber_id != NULL); | 856 assert(h->reh_master_subscriber_id != NULL); |
531 | |
532 /* Validate format of auxiliary state: no spaces allowed */ | |
533 if (p != NULL) { | |
534 while (*p != '\0') { | |
535 if (isspace(*p)) | |
536 return (EINVAL); | |
537 p++; | |
538 } | |
539 } | |
540 | 857 |
541 if ((scf_h = scf_handle_create(SCF_VERSION)) == NULL) { | 858 if ((scf_h = scf_handle_create(SCF_VERSION)) == NULL) { |
542 switch (scf_error()) { | 859 switch (scf_error()) { |
543 case SCF_ERROR_VERSION_MISMATCH: | 860 case SCF_ERROR_VERSION_MISMATCH: |
544 return (EPROTO); | 861 return (EPROTO); |
570 if (nvlist_alloc(&attr, NV_UNIQUE_NAME, 0) != 0 || | 887 if (nvlist_alloc(&attr, NV_UNIQUE_NAME, 0) != 0 || |
571 nvlist_add_int32(attr, RESTARTER_NAME_STATE, new_cur_state) != 0 || | 888 nvlist_add_int32(attr, RESTARTER_NAME_STATE, new_cur_state) != 0 || |
572 nvlist_add_int32(attr, RESTARTER_NAME_NEXT_STATE, new_next_state) | 889 nvlist_add_int32(attr, RESTARTER_NAME_NEXT_STATE, new_next_state) |
573 != 0 || | 890 != 0 || |
574 nvlist_add_int32(attr, RESTARTER_NAME_ERROR, e) != 0 || | 891 nvlist_add_int32(attr, RESTARTER_NAME_ERROR, e) != 0 || |
575 nvlist_add_string(attr, RESTARTER_NAME_INSTANCE, inst) != 0) { | 892 nvlist_add_string(attr, RESTARTER_NAME_INSTANCE, inst) != 0 || |
893 nvlist_add_int32(attr, RESTARTER_NAME_REASON, aux) != 0) { | |
576 ret = ENOMEM; | 894 ret = ENOMEM; |
577 } else { | 895 } else { |
578 id.i_fmri = inst; | 896 id.i_fmri = inst; |
579 id.i_state = cur_state; | 897 id.i_state = cur_state; |
580 id.i_next_state = next_state; | 898 id.i_next_state = next_state; |
581 | 899 |
582 ret = _restarter_commit_states(scf_h, &id, new_cur_state, | 900 ret = _restarter_commit_states(scf_h, &id, new_cur_state, |
583 new_next_state, aux); | 901 new_next_state, p); |
584 | 902 |
585 if (ret == 0) { | 903 if (ret == 0) { |
586 ret = restarter_event_publish_retry( | 904 ret = restarter_event_publish_retry( |
587 h->reh_master_channel, "master", "state_change", | 905 h->reh_master_channel, "master", "state_change", |
588 "com.sun", "librestart", attr, EVCH_NOSLEEP); | 906 "com.sun", "librestart", attr, EVCH_NOSLEEP); |