Mercurial > illumos > illumos-gate
annotate usr/src/cmd/svc/startd/method.c @ 9263:48d14e1f550f
6411391 Empty method_contexts don't work
5093847 importing a manifest should create method_context properties even if not set
6517270 partial method context definitions should work
6348085 Spurious "Ignoring duplicate environment variable" message
6302173 startd dies after interaction with inetd / inetd service
6221667 method context errors should be logged in the service's instance log
6764671 Array overrun in librestart
author | Sean Wilcox <Sean.Wilcox@Sun.COM> |
---|---|
date | Thu, 02 Apr 2009 13:52:34 -0700 |
parents | e4ff744ddfe1 |
children | 2522fef20c5f |
rev | line source |
---|---|
0 | 1 /* |
2 * CDDL HEADER START | |
3 * | |
4 * The contents of this file are subject to the terms of the | |
1712 | 5 * Common Development and Distribution License (the "License"). |
6 * You may not use this file except in compliance with the License. | |
0 | 7 * |
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 * or http://www.opensolaris.org/os/licensing. | |
10 * See the License for the specific language governing permissions | |
11 * and limitations under the License. | |
12 * | |
13 * When distributing Covered Code, include this CDDL HEADER in each | |
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 * If applicable, add the following below this CDDL HEADER, with the | |
16 * fields enclosed by brackets "[]" replaced with your own identifying | |
17 * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 * | |
19 * CDDL HEADER END | |
20 */ | |
9263
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
21 |
0 | 22 /* |
8944
e4ff744ddfe1
6805730 some simple changes would make 'init 5' much faster
Dan Price <dp@eng.sun.com>
parents:
6748
diff
changeset
|
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. |
0 | 24 * Use is subject to license terms. |
25 */ | |
26 | |
27 /* | |
28 * method.c - method execution functions | |
29 * | |
30 * This file contains the routines needed to run a method: a fork(2)-exec(2) | |
31 * invocation monitored using either the contract filesystem or waitpid(2). | |
32 * (Plain fork1(2) support is provided in fork.c.) | |
33 * | |
34 * Contract Transfer | |
35 * When we restart a service, we want to transfer any contracts that the old | |
36 * service's contract inherited. This means that (a) we must not abandon the | |
37 * old contract when the service dies and (b) we must write the id of the old | |
38 * contract into the terms of the new contract. There should be limits to | |
39 * (a), though, since we don't want to keep the contract around forever. To | |
40 * this end we'll say that services in the offline state may have a contract | |
41 * to be transfered and services in the disabled or maintenance states cannot. | |
42 * This means that when a service transitions from online (or degraded) to | |
43 * offline, the contract should be preserved, and when the service transitions | |
44 * from offline to online (i.e., the start method), we'll transfer inherited | |
45 * contracts. | |
46 */ | |
47 | |
48 #include <sys/contract/process.h> | |
49 #include <sys/ctfs.h> | |
50 #include <sys/stat.h> | |
51 #include <sys/time.h> | |
52 #include <sys/types.h> | |
53 #include <sys/uio.h> | |
54 #include <sys/wait.h> | |
55 #include <alloca.h> | |
56 #include <assert.h> | |
57 #include <errno.h> | |
58 #include <fcntl.h> | |
59 #include <libcontract.h> | |
60 #include <libcontract_priv.h> | |
61 #include <libgen.h> | |
62 #include <librestart.h> | |
63 #include <libscf.h> | |
64 #include <limits.h> | |
65 #include <port.h> | |
66 #include <sac.h> | |
67 #include <signal.h> | |
68 #include <stdlib.h> | |
69 #include <string.h> | |
70 #include <strings.h> | |
71 #include <unistd.h> | |
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
72 #include <atomic.h> |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
73 #include <poll.h> |
0 | 74 |
75 #include "startd.h" | |
76 | |
77 #define SBIN_SH "/sbin/sh" | |
78 | |
79 /* | |
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
80 * Used to tell if contracts are in the process of being |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
81 * stored into the svc.startd internal hash table. |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
82 */ |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
83 volatile uint16_t storing_contract = 0; |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
84 |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
85 /* |
0 | 86 * Mapping from restart_on method-type to contract events. Must correspond to |
87 * enum method_restart_t. | |
88 */ | |
89 static uint_t method_events[] = { | |
90 /* METHOD_RESTART_ALL */ | |
91 CT_PR_EV_HWERR | CT_PR_EV_SIGNAL | CT_PR_EV_CORE | CT_PR_EV_EMPTY, | |
92 /* METHOD_RESTART_EXTERNAL_FAULT */ | |
93 CT_PR_EV_HWERR | CT_PR_EV_SIGNAL, | |
94 /* METHOD_RESTART_ANY_FAULT */ | |
95 CT_PR_EV_HWERR | CT_PR_EV_SIGNAL | CT_PR_EV_CORE | |
96 }; | |
97 | |
98 /* | |
99 * method_record_start(restarter_inst_t *) | |
100 * Record a service start for rate limiting. Place the current time | |
101 * in the circular array of instance starts. | |
102 */ | |
103 static void | |
104 method_record_start(restarter_inst_t *inst) | |
105 { | |
106 int index = inst->ri_start_index++ % RINST_START_TIMES; | |
107 | |
108 inst->ri_start_time[index] = gethrtime(); | |
109 } | |
110 | |
111 /* | |
112 * method_rate_critical(restarter_inst_t *) | |
113 * Return true if the average start interval is less than the permitted | |
114 * interval. Implicit success if insufficient measurements for an | |
115 * average exist. | |
116 */ | |
117 static int | |
118 method_rate_critical(restarter_inst_t *inst) | |
119 { | |
120 uint_t n = inst->ri_start_index; | |
121 hrtime_t avg_ns = 0; | |
122 | |
123 if (inst->ri_start_index < RINST_START_TIMES) | |
124 return (0); | |
125 | |
126 avg_ns = | |
127 (inst->ri_start_time[(n - 1) % RINST_START_TIMES] - | |
128 inst->ri_start_time[n % RINST_START_TIMES]) / | |
129 (RINST_START_TIMES - 1); | |
130 | |
131 return (avg_ns < RINST_FAILURE_RATE_NS); | |
132 } | |
133 | |
134 /* | |
135 * int method_is_transient() | |
136 * Determine if the method for the given instance is transient, | |
137 * from a contract perspective. Return 1 if it is, and 0 if it isn't. | |
138 */ | |
139 static int | |
140 method_is_transient(restarter_inst_t *inst, int type) | |
141 { | |
142 if (instance_is_transient_style(inst) || type != METHOD_START) | |
143 return (1); | |
144 else | |
145 return (0); | |
146 } | |
147 | |
148 /* | |
149 * void method_store_contract() | |
150 * Store the newly created contract id into local structures and | |
151 * the repository. If the repository connection is broken it is rebound. | |
152 */ | |
153 static void | |
154 method_store_contract(restarter_inst_t *inst, int type, ctid_t *cid) | |
155 { | |
156 int r; | |
157 boolean_t primary; | |
158 | |
159 if (errno = contract_latest(cid)) | |
160 uu_die("%s: Couldn't get new contract's id", inst->ri_i.i_fmri); | |
161 | |
162 primary = !method_is_transient(inst, type); | |
163 | |
164 if (!primary) { | |
165 if (inst->ri_i.i_transient_ctid != 0) { | |
166 log_framework(LOG_INFO, | |
167 "%s: transient ctid expected to be 0 but " | |
168 "was set to %ld\n", inst->ri_i.i_fmri, | |
169 inst->ri_i.i_transient_ctid); | |
170 } | |
171 | |
172 inst->ri_i.i_transient_ctid = *cid; | |
173 } else { | |
174 if (inst->ri_i.i_primary_ctid != 0) { | |
175 /* | |
176 * There was an old contract that we transferred. | |
177 * Remove it. | |
178 */ | |
179 method_remove_contract(inst, B_TRUE, B_FALSE); | |
180 } | |
181 | |
182 if (inst->ri_i.i_primary_ctid != 0) { | |
183 log_framework(LOG_INFO, | |
184 "%s: primary ctid expected to be 0 but " | |
185 "was set to %ld\n", inst->ri_i.i_fmri, | |
186 inst->ri_i.i_primary_ctid); | |
187 } | |
188 | |
189 inst->ri_i.i_primary_ctid = *cid; | |
190 inst->ri_i.i_primary_ctid_stopped = 0; | |
191 | |
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
192 log_framework(LOG_DEBUG, "Storing primary contract %ld for " |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
193 "%s.\n", *cid, inst->ri_i.i_fmri); |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
194 |
0 | 195 contract_hash_store(*cid, inst->ri_id); |
196 } | |
197 | |
198 again: | |
199 if (inst->ri_mi_deleted) | |
200 return; | |
201 | |
202 r = restarter_store_contract(inst->ri_m_inst, *cid, primary ? | |
203 RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT); | |
204 switch (r) { | |
205 case 0: | |
206 break; | |
207 | |
208 case ECANCELED: | |
209 inst->ri_mi_deleted = B_TRUE; | |
210 break; | |
211 | |
212 case ECONNABORTED: | |
213 libscf_handle_rebind(scf_instance_handle(inst->ri_m_inst)); | |
214 /* FALLTHROUGH */ | |
215 | |
216 case EBADF: | |
217 libscf_reget_instance(inst); | |
218 goto again; | |
219 | |
220 case ENOMEM: | |
221 case EPERM: | |
222 case EACCES: | |
223 case EROFS: | |
224 uu_die("%s: Couldn't store contract id %ld", | |
225 inst->ri_i.i_fmri, *cid); | |
226 /* NOTREACHED */ | |
227 | |
228 case EINVAL: | |
229 default: | |
230 bad_error("restarter_store_contract", r); | |
231 } | |
232 } | |
233 | |
234 /* | |
235 * void method_remove_contract() | |
236 * Remove any non-permanent contracts from internal structures and | |
237 * the repository, then abandon them. | |
238 * Returns | |
239 * 0 - success | |
240 * ECANCELED - inst was deleted from the repository | |
241 * | |
242 * If the repository connection was broken, it is rebound. | |
243 */ | |
244 void | |
245 method_remove_contract(restarter_inst_t *inst, boolean_t primary, | |
246 boolean_t abandon) | |
247 { | |
248 ctid_t * const ctidp = primary ? &inst->ri_i.i_primary_ctid : | |
249 &inst->ri_i.i_transient_ctid; | |
250 | |
251 int r; | |
252 | |
253 assert(*ctidp != 0); | |
254 | |
255 log_framework(LOG_DEBUG, "Removing %s contract %lu for %s.\n", | |
256 primary ? "primary" : "transient", *ctidp, inst->ri_i.i_fmri); | |
257 | |
258 if (abandon) | |
259 contract_abandon(*ctidp); | |
260 | |
261 again: | |
262 if (inst->ri_mi_deleted) { | |
263 r = ECANCELED; | |
264 goto out; | |
265 } | |
266 | |
267 r = restarter_remove_contract(inst->ri_m_inst, *ctidp, primary ? | |
268 RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT); | |
269 switch (r) { | |
270 case 0: | |
271 break; | |
272 | |
273 case ECANCELED: | |
274 inst->ri_mi_deleted = B_TRUE; | |
275 break; | |
276 | |
277 case ECONNABORTED: | |
278 libscf_handle_rebind(scf_instance_handle(inst->ri_m_inst)); | |
279 /* FALLTHROUGH */ | |
280 | |
281 case EBADF: | |
282 libscf_reget_instance(inst); | |
283 goto again; | |
284 | |
285 case ENOMEM: | |
286 case EPERM: | |
287 case EACCES: | |
288 case EROFS: | |
289 log_error(LOG_INFO, "%s: Couldn't remove contract id %ld: " | |
290 "%s.\n", inst->ri_i.i_fmri, *ctidp, strerror(r)); | |
291 break; | |
292 | |
293 case EINVAL: | |
294 default: | |
295 bad_error("restarter_remove_contract", r); | |
296 } | |
297 | |
298 out: | |
299 if (primary) | |
300 contract_hash_remove(*ctidp); | |
301 | |
302 *ctidp = 0; | |
303 } | |
304 | |
6073 | 305 static const char *method_names[] = { "start", "stop", "refresh" }; |
306 | |
0 | 307 /* |
308 * int method_ready_contract(restarter_inst_t *, int, method_restart_t, int) | |
309 * | |
310 * Activate a contract template for the type method of inst. type, | |
311 * restart_on, and cte_mask dictate the critical events term of the contract. | |
312 * Returns | |
313 * 0 - success | |
314 * ECANCELED - inst has been deleted from the repository | |
315 */ | |
316 static int | |
317 method_ready_contract(restarter_inst_t *inst, int type, | |
318 method_restart_t restart_on, uint_t cte_mask) | |
319 { | |
320 int tmpl, err, istrans, iswait, ret; | |
321 uint_t cevents, fevents; | |
322 | |
323 /* | |
324 * Correctly supporting wait-style services is tricky without | |
325 * rearchitecting startd to cope with multiple event sources | |
326 * simultaneously trying to stop an instance. Until a better | |
327 * solution is implemented, we avoid this problem for | |
328 * wait-style services by making contract events fatal and | |
329 * letting the wait code alone handle stopping the service. | |
330 */ | |
331 iswait = instance_is_wait_style(inst); | |
332 istrans = method_is_transient(inst, type); | |
333 | |
334 tmpl = open64(CTFS_ROOT "/process/template", O_RDWR); | |
335 if (tmpl == -1) | |
336 uu_die("Could not create contract template"); | |
337 | |
338 /* | |
339 * We assume non-login processes are unlikely to create | |
340 * multiple process groups, and set CT_PR_PGRPONLY for all | |
341 * wait-style services' contracts. | |
342 */ | |
343 err = ct_pr_tmpl_set_param(tmpl, CT_PR_INHERIT | CT_PR_REGENT | | |
344 (iswait ? CT_PR_PGRPONLY : 0)); | |
345 assert(err == 0); | |
346 | |
347 if (istrans) { | |
348 cevents = 0; | |
349 fevents = 0; | |
350 } else { | |
351 assert(restart_on >= 0); | |
352 assert(restart_on <= METHOD_RESTART_ANY_FAULT); | |
353 cevents = method_events[restart_on] & ~cte_mask; | |
354 fevents = iswait ? | |
355 (method_events[restart_on] & ~cte_mask & CT_PR_ALLFATAL) : | |
356 0; | |
357 } | |
358 | |
359 err = ct_tmpl_set_critical(tmpl, cevents); | |
360 assert(err == 0); | |
361 | |
362 err = ct_tmpl_set_informative(tmpl, 0); | |
363 assert(err == 0); | |
364 err = ct_pr_tmpl_set_fatal(tmpl, fevents); | |
365 assert(err == 0); | |
366 | |
367 err = ct_tmpl_set_cookie(tmpl, istrans ? METHOD_OTHER_COOKIE : | |
368 METHOD_START_COOKIE); | |
369 assert(err == 0); | |
370 | |
371 if (type == METHOD_START && inst->ri_i.i_primary_ctid != 0) { | |
372 ret = ct_pr_tmpl_set_transfer(tmpl, inst->ri_i.i_primary_ctid); | |
373 switch (ret) { | |
374 case 0: | |
375 break; | |
376 | |
377 case ENOTEMPTY: | |
378 /* No contracts for you! */ | |
379 method_remove_contract(inst, B_TRUE, B_TRUE); | |
380 if (inst->ri_mi_deleted) { | |
381 ret = ECANCELED; | |
382 goto out; | |
383 } | |
384 break; | |
385 | |
386 case EINVAL: | |
387 case ESRCH: | |
388 case EACCES: | |
389 default: | |
390 bad_error("ct_pr_tmpl_set_transfer", ret); | |
391 } | |
392 } | |
393 | |
6073 | 394 err = ct_pr_tmpl_set_svc_fmri(tmpl, inst->ri_i.i_fmri); |
395 assert(err == 0); | |
396 err = ct_pr_tmpl_set_svc_aux(tmpl, method_names[type]); | |
397 assert(err == 0); | |
398 | |
0 | 399 err = ct_tmpl_activate(tmpl); |
400 assert(err == 0); | |
401 | |
402 ret = 0; | |
403 | |
404 out: | |
405 err = close(tmpl); | |
406 assert(err == 0); | |
407 | |
408 return (ret); | |
409 } | |
410 | |
411 static void | |
412 exec_method(const restarter_inst_t *inst, int type, const char *method, | |
413 struct method_context *mcp, uint8_t need_session) | |
414 { | |
415 char *cmd; | |
416 const char *errf; | |
417 char **nenv; | |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
418 int rsmc_errno = 0; |
0 | 419 |
420 cmd = uu_msprintf("exec %s", method); | |
421 | |
422 if (inst->ri_utmpx_prefix[0] != '\0' && inst->ri_utmpx_prefix != NULL) | |
423 (void) utmpx_mark_init(getpid(), inst->ri_utmpx_prefix); | |
424 | |
425 setlog(inst->ri_logstem); | |
5238
37127716670e
Contributed by Mark Martin <storycrafter@gmail.com>.
lianep
parents:
5040
diff
changeset
|
426 log_instance(inst, B_FALSE, "Executing %s method (\"%s\").", |
0 | 427 method_names[type], method); |
428 | |
429 if (need_session) | |
430 (void) setpgrp(); | |
431 | |
432 /* Set credentials. */ | |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
433 rsmc_errno = restarter_set_method_context(mcp, &errf); |
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
434 if (rsmc_errno != 0) { |
9263
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
435 log_instance(inst, B_FALSE, |
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
436 "svc.startd could not set context for method: "); |
0 | 437 |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
438 if (rsmc_errno == -1) { |
0 | 439 if (strcmp(errf, "core_set_process_path") == 0) { |
9263
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
440 log_instance(inst, B_FALSE, |
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
441 "Could not set corefile path."); |
0 | 442 } else if (strcmp(errf, "setproject") == 0) { |
9263
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
443 log_instance(inst, B_FALSE, "%s: a resource " |
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
444 "control assignment failed", errf); |
0 | 445 } else if (strcmp(errf, "pool_set_binding") == 0) { |
9263
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
446 log_instance(inst, B_FALSE, "%s: a system " |
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
447 "error occurred", errf); |
0 | 448 } else { |
449 #ifndef NDEBUG | |
450 uu_warn("%s:%d: Bad function name \"%s\" for " | |
451 "error %d from " | |
452 "restarter_set_method_context().\n", | |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
453 __FILE__, __LINE__, errf, rsmc_errno); |
0 | 454 #endif |
455 abort(); | |
456 } | |
457 | |
458 exit(1); | |
459 } | |
460 | |
461 if (errf != NULL && strcmp(errf, "pool_set_binding") == 0) { | |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
462 switch (rsmc_errno) { |
0 | 463 case ENOENT: |
9263
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
464 log_instance(inst, B_FALSE, "%s: the pool " |
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
465 "could not be found", errf); |
0 | 466 break; |
467 | |
468 case EBADF: | |
9263
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
469 log_instance(inst, B_FALSE, "%s: the " |
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
470 "configuration is invalid", errf); |
0 | 471 break; |
472 | |
1712 | 473 case EINVAL: |
9263
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
474 log_instance(inst, B_FALSE, "%s: pool name " |
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
475 "\"%s\" is invalid", errf, |
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
476 mcp->resource_pool); |
1712 | 477 break; |
478 | |
0 | 479 default: |
480 #ifndef NDEBUG | |
481 uu_warn("%s:%d: Bad error %d for function %s " | |
482 "in restarter_set_method_context().\n", | |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
483 __FILE__, __LINE__, rsmc_errno, errf); |
0 | 484 #endif |
485 abort(); | |
486 } | |
487 | |
488 exit(SMF_EXIT_ERR_CONFIG); | |
489 } | |
490 | |
491 if (errf != NULL) { | |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
492 errno = rsmc_errno; |
0 | 493 perror(errf); |
494 | |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
495 switch (rsmc_errno) { |
0 | 496 case EINVAL: |
497 case EPERM: | |
498 case ENOENT: | |
499 case ENAMETOOLONG: | |
500 case ERANGE: | |
501 case ESRCH: | |
502 exit(SMF_EXIT_ERR_CONFIG); | |
503 /* NOTREACHED */ | |
504 | |
505 default: | |
506 exit(1); | |
507 } | |
508 } | |
509 | |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
510 switch (rsmc_errno) { |
0 | 511 case ENOMEM: |
9263
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
512 log_instance(inst, B_FALSE, "Out of memory."); |
0 | 513 exit(1); |
514 /* NOTREACHED */ | |
515 | |
516 case ENOENT: | |
9263
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
517 log_instance(inst, B_FALSE, "Missing passwd entry for " |
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
518 "user."); |
0 | 519 exit(SMF_EXIT_ERR_CONFIG); |
520 /* NOTREACHED */ | |
521 | |
522 default: | |
523 #ifndef NDEBUG | |
524 uu_warn("%s:%d: Bad miscellaneous error %d from " | |
525 "restarter_set_method_context().\n", __FILE__, | |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
526 __LINE__, rsmc_errno); |
0 | 527 #endif |
528 abort(); | |
529 } | |
530 } | |
531 | |
5040
ff6ebd8761a6
PSARC 2007/177 SMF read-protected property storage
wesolows
parents:
4816
diff
changeset
|
532 nenv = set_smf_env(mcp->env, mcp->env_sz, NULL, inst, |
ff6ebd8761a6
PSARC 2007/177 SMF read-protected property storage
wesolows
parents:
4816
diff
changeset
|
533 method_names[type]); |
0 | 534 |
535 log_preexec(); | |
536 | |
537 (void) execle(SBIN_SH, SBIN_SH, "-c", cmd, NULL, nenv); | |
538 | |
539 exit(10); | |
540 } | |
541 | |
542 static void | |
543 write_status(restarter_inst_t *inst, const char *mname, int stat) | |
544 { | |
545 int r; | |
546 | |
547 again: | |
548 if (inst->ri_mi_deleted) | |
549 return; | |
550 | |
551 r = libscf_write_method_status(inst->ri_m_inst, mname, stat); | |
552 switch (r) { | |
553 case 0: | |
554 break; | |
555 | |
556 case ECONNABORTED: | |
557 libscf_reget_instance(inst); | |
558 goto again; | |
559 | |
560 case ECANCELED: | |
561 inst->ri_mi_deleted = 1; | |
562 break; | |
563 | |
564 case EPERM: | |
565 case EACCES: | |
566 case EROFS: | |
567 log_framework(LOG_INFO, "Could not write exit status " | |
568 "for %s method of %s: %s.\n", mname, | |
569 inst->ri_i.i_fmri, strerror(r)); | |
570 break; | |
571 | |
572 case ENAMETOOLONG: | |
573 default: | |
574 bad_error("libscf_write_method_status", r); | |
575 } | |
576 } | |
577 | |
578 /* | |
579 * int method_run() | |
580 * Execute the type method of instp. If it requires a fork(), wait for it | |
581 * to return and return its exit code in *exit_code. Otherwise set | |
582 * *exit_code to 0 if the method succeeds & -1 if it fails. If the | |
583 * repository connection is broken, it is rebound, but inst may not be | |
584 * reset. | |
585 * Returns | |
586 * 0 - success | |
587 * EINVAL - A correct method or method context couldn't be retrieved. | |
588 * EIO - Contract kill failed. | |
589 * EFAULT - Method couldn't be executed successfully. | |
590 * ELOOP - Retry threshold exceeded. | |
591 * ECANCELED - inst was deleted from the repository before method was run | |
592 * ERANGE - Timeout retry threshold exceeded. | |
593 * EAGAIN - Failed due to external cause, retry. | |
594 */ | |
595 int | |
596 method_run(restarter_inst_t **instp, int type, int *exit_code) | |
597 { | |
598 char *method; | |
599 int ret_status; | |
600 pid_t pid; | |
601 method_restart_t restart_on; | |
602 uint_t cte_mask; | |
603 uint8_t need_session; | |
604 scf_handle_t *h; | |
605 scf_snapshot_t *snap; | |
606 const char *mname; | |
607 const char *errstr; | |
608 struct method_context *mcp; | |
609 int result = 0, timeout_fired = 0; | |
610 int sig, r; | |
611 boolean_t transient; | |
612 uint64_t timeout; | |
613 uint8_t timeout_retry; | |
614 ctid_t ctid; | |
615 int ctfd = -1; | |
616 restarter_inst_t *inst = *instp; | |
617 int id = inst->ri_id; | |
119
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
618 int forkerr; |
0 | 619 |
620 assert(PTHREAD_MUTEX_HELD(&inst->ri_lock)); | |
621 assert(instance_in_transition(inst)); | |
622 | |
623 if (inst->ri_mi_deleted) | |
624 return (ECANCELED); | |
625 | |
626 *exit_code = 0; | |
627 | |
628 assert(0 <= type && type <= 2); | |
629 mname = method_names[type]; | |
630 | |
631 if (type == METHOD_START) | |
632 inst->ri_pre_online_hook(); | |
633 | |
634 h = scf_instance_handle(inst->ri_m_inst); | |
635 | |
636 snap = scf_snapshot_create(h); | |
637 if (snap == NULL || | |
638 scf_instance_get_snapshot(inst->ri_m_inst, "running", snap) != 0) { | |
639 log_framework(LOG_DEBUG, | |
640 "Could not get running snapshot for %s. " | |
641 "Using editing version to run method %s.\n", | |
642 inst->ri_i.i_fmri, mname); | |
643 scf_snapshot_destroy(snap); | |
644 snap = NULL; | |
645 } | |
646 | |
647 /* | |
648 * After this point, we may be logging to the instance log. | |
649 * Make sure we've noted where that log is as a property of | |
650 * the instance. | |
651 */ | |
652 r = libscf_note_method_log(inst->ri_m_inst, st->st_log_prefix, | |
653 inst->ri_logstem); | |
654 if (r != 0) { | |
655 log_framework(LOG_WARNING, | |
656 "%s: couldn't note log location: %s\n", | |
657 inst->ri_i.i_fmri, strerror(r)); | |
658 } | |
659 | |
660 if ((method = libscf_get_method(h, type, inst, snap, &restart_on, | |
661 &cte_mask, &need_session, &timeout, &timeout_retry)) == NULL) { | |
662 if (errno == LIBSCF_PGROUP_ABSENT) { | |
663 log_framework(LOG_DEBUG, | |
664 "%s: instance has no method property group '%s'.\n", | |
665 inst->ri_i.i_fmri, mname); | |
666 if (type == METHOD_REFRESH) | |
667 log_instance(inst, B_TRUE, "No '%s' method " | |
668 "defined. Treating as :true.", mname); | |
669 else | |
670 log_instance(inst, B_TRUE, "Method property " | |
671 "group '%s' is not present.", mname); | |
672 scf_snapshot_destroy(snap); | |
673 return (0); | |
674 } else if (errno == LIBSCF_PROPERTY_ABSENT) { | |
675 log_framework(LOG_DEBUG, | |
676 "%s: instance has no '%s/exec' method property.\n", | |
677 inst->ri_i.i_fmri, mname); | |
678 log_instance(inst, B_TRUE, "Method property '%s/exec " | |
679 "is not present.", mname); | |
680 scf_snapshot_destroy(snap); | |
681 return (0); | |
682 } else { | |
683 log_error(LOG_WARNING, | |
684 "%s: instance libscf_get_method failed\n", | |
685 inst->ri_i.i_fmri); | |
686 scf_snapshot_destroy(snap); | |
687 return (EINVAL); | |
688 } | |
689 } | |
690 | |
691 /* open service contract if stopping a non-transient service */ | |
692 if (type == METHOD_STOP && (!instance_is_transient_style(inst))) { | |
693 if (inst->ri_i.i_primary_ctid == 0) { | |
694 /* service is not running, nothing to stop */ | |
695 log_framework(LOG_DEBUG, "%s: instance has no primary " | |
696 "contract, no service to stop.\n", | |
697 inst->ri_i.i_fmri); | |
698 scf_snapshot_destroy(snap); | |
699 return (0); | |
700 } | |
701 if ((ctfd = contract_open(inst->ri_i.i_primary_ctid, "process", | |
702 "events", O_RDONLY)) < 0) { | |
703 result = EFAULT; | |
704 log_instance(inst, B_TRUE, "Could not open service " | |
5238
37127716670e
Contributed by Mark Martin <storycrafter@gmail.com>.
lianep
parents:
5040
diff
changeset
|
705 "contract %ld. Stop method not run.", |
0 | 706 inst->ri_i.i_primary_ctid); |
707 goto out; | |
708 } | |
709 } | |
710 | |
711 if (restarter_is_null_method(method)) { | |
712 log_framework(LOG_DEBUG, "%s: null method succeeds\n", | |
713 inst->ri_i.i_fmri); | |
714 | |
5238
37127716670e
Contributed by Mark Martin <storycrafter@gmail.com>.
lianep
parents:
5040
diff
changeset
|
715 log_instance(inst, B_TRUE, "Executing %s method (null).", |
37127716670e
Contributed by Mark Martin <storycrafter@gmail.com>.
lianep
parents:
5040
diff
changeset
|
716 mname); |
0 | 717 |
718 if (type == METHOD_START) | |
719 write_status(inst, mname, 0); | |
720 goto out; | |
721 } | |
722 | |
723 sig = restarter_is_kill_method(method); | |
724 if (sig >= 0) { | |
725 | |
726 if (inst->ri_i.i_primary_ctid == 0) { | |
727 log_error(LOG_ERR, "%s: :kill with no contract\n", | |
728 inst->ri_i.i_fmri); | |
5238
37127716670e
Contributed by Mark Martin <storycrafter@gmail.com>.
lianep
parents:
5040
diff
changeset
|
729 log_instance(inst, B_TRUE, "Invalid use of \":kill\" " |
37127716670e
Contributed by Mark Martin <storycrafter@gmail.com>.
lianep
parents:
5040
diff
changeset
|
730 "as stop method for transient service."); |
0 | 731 result = EINVAL; |
732 goto out; | |
733 } | |
734 | |
735 log_framework(LOG_DEBUG, | |
736 "%s: :killing contract with signal %d\n", | |
737 inst->ri_i.i_fmri, sig); | |
738 | |
5238
37127716670e
Contributed by Mark Martin <storycrafter@gmail.com>.
lianep
parents:
5040
diff
changeset
|
739 log_instance(inst, B_TRUE, "Executing %s method (:kill).", |
0 | 740 mname); |
741 | |
742 if (contract_kill(inst->ri_i.i_primary_ctid, sig, | |
743 inst->ri_i.i_fmri) != 0) { | |
744 result = EIO; | |
745 goto out; | |
746 } else | |
747 goto assured_kill; | |
748 } | |
749 | |
750 log_framework(LOG_DEBUG, "%s: forking to run method %s\n", | |
751 inst->ri_i.i_fmri, method); | |
752 | |
753 errstr = restarter_get_method_context(RESTARTER_METHOD_CONTEXT_VERSION, | |
754 inst->ri_m_inst, snap, mname, method, &mcp); | |
755 | |
756 if (errstr != NULL) { | |
9263
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
757 log_instance(inst, B_TRUE, "%s", errstr); |
0 | 758 result = EINVAL; |
759 goto out; | |
760 } | |
761 | |
762 r = method_ready_contract(inst, type, restart_on, cte_mask); | |
763 if (r != 0) { | |
764 assert(r == ECANCELED); | |
765 assert(inst->ri_mi_deleted); | |
766 restarter_free_method_context(mcp); | |
767 result = ECANCELED; | |
768 goto out; | |
769 } | |
770 | |
771 /* | |
772 * Validate safety of method contexts, to save children work. | |
773 */ | |
774 if (!restarter_rm_libs_loadable()) | |
775 log_framework(LOG_DEBUG, "%s: method contexts limited " | |
776 "to root-accessible libraries\n", inst->ri_i.i_fmri); | |
777 | |
778 /* | |
779 * If the service is restarting too quickly, send it to | |
780 * maintenance. | |
781 */ | |
782 if (type == METHOD_START) { | |
783 method_record_start(inst); | |
784 if (method_rate_critical(inst)) { | |
785 log_instance(inst, B_TRUE, "Restarting too quickly, " | |
5238
37127716670e
Contributed by Mark Martin <storycrafter@gmail.com>.
lianep
parents:
5040
diff
changeset
|
786 "changing state to maintenance."); |
0 | 787 result = ELOOP; |
3179
80729b9ca1d6
5079387 _get_auth_policy() doesn't provide corresponding free function
jeanm
parents:
1712
diff
changeset
|
788 restarter_free_method_context(mcp); |
0 | 789 goto out; |
790 } | |
791 } | |
792 | |
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
793 atomic_add_16(&storing_contract, 1); |
119
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
794 pid = startd_fork1(&forkerr); |
0 | 795 if (pid == 0) |
796 exec_method(inst, type, method, mcp, need_session); | |
797 | |
798 if (pid == -1) { | |
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
799 atomic_add_16(&storing_contract, -1); |
119
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
800 if (forkerr == EAGAIN) |
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
801 result = EAGAIN; |
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
802 else |
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
803 result = EFAULT; |
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
804 |
0 | 805 log_error(LOG_WARNING, |
119
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
806 "%s: Couldn't fork to execute method %s: %s\n", |
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
807 inst->ri_i.i_fmri, method, strerror(forkerr)); |
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
808 |
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
809 restarter_free_method_context(mcp); |
0 | 810 goto out; |
811 } | |
812 | |
813 | |
814 /* | |
815 * Get the contract id, decide whether it is primary or transient, and | |
816 * stash it in inst & the repository. | |
817 */ | |
818 method_store_contract(inst, type, &ctid); | |
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
819 atomic_add_16(&storing_contract, -1); |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
820 |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
821 restarter_free_method_context(mcp); |
0 | 822 |
823 /* | |
824 * Similarly for the start method PID. | |
825 */ | |
826 if (type == METHOD_START && !inst->ri_mi_deleted) | |
827 (void) libscf_write_start_pid(inst->ri_m_inst, pid); | |
828 | |
829 if (instance_is_wait_style(inst) && type == METHOD_START) { | |
830 /* Wait style instances don't get timeouts on start methods. */ | |
831 if (wait_register(pid, inst->ri_i.i_fmri, 1, 0)) { | |
832 log_error(LOG_WARNING, | |
833 "%s: couldn't register %ld for wait\n", | |
834 inst->ri_i.i_fmri, pid); | |
835 result = EFAULT; | |
836 goto contract_out; | |
837 } | |
838 write_status(inst, mname, 0); | |
839 | |
840 } else { | |
841 int r, err; | |
842 time_t start_time; | |
843 time_t end_time; | |
844 | |
845 /* | |
846 * Because on upgrade/live-upgrade we may have no chance | |
847 * to override faulty timeout values on the way to | |
848 * manifest import, all services on the path to manifest | |
849 * import are treated the same as INFINITE timeout services. | |
850 */ | |
851 | |
852 start_time = time(NULL); | |
853 if (timeout != METHOD_TIMEOUT_INFINITE && !is_timeout_ovr(inst)) | |
854 timeout_insert(inst, ctid, timeout); | |
855 else | |
856 timeout = METHOD_TIMEOUT_INFINITE; | |
857 | |
858 /* Unlock the instance while waiting for the method. */ | |
859 MUTEX_UNLOCK(&inst->ri_lock); | |
860 | |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
861 do { |
0 | 862 r = waitpid(pid, &ret_status, NULL); |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
863 } while (r == -1 && errno == EINTR); |
0 | 864 if (r == -1) |
865 err = errno; | |
866 | |
867 /* Re-grab the lock. */ | |
868 inst = inst_lookup_by_id(id); | |
869 | |
870 /* | |
871 * inst can't be removed, as the removal thread waits | |
872 * for completion of this one. | |
873 */ | |
874 assert(inst != NULL); | |
875 *instp = inst; | |
876 | |
877 if (inst->ri_timeout != NULL && inst->ri_timeout->te_fired) | |
878 timeout_fired = 1; | |
879 | |
880 timeout_remove(inst, ctid); | |
881 | |
882 log_framework(LOG_DEBUG, | |
883 "%s method for %s exited with status %d.\n", mname, | |
884 inst->ri_i.i_fmri, WEXITSTATUS(ret_status)); | |
885 | |
886 if (r == -1) { | |
887 log_error(LOG_WARNING, | |
888 "Couldn't waitpid() for %s method of %s (%s).\n", | |
889 mname, inst->ri_i.i_fmri, strerror(err)); | |
890 result = EFAULT; | |
891 goto contract_out; | |
892 } | |
893 | |
894 if (type == METHOD_START) | |
895 write_status(inst, mname, ret_status); | |
896 | |
897 /* return ERANGE if this service doesn't retry on timeout */ | |
898 if (timeout_fired == 1 && timeout_retry == 0) { | |
899 result = ERANGE; | |
900 goto contract_out; | |
901 } | |
902 | |
903 if (!WIFEXITED(ret_status)) { | |
904 /* | |
905 * If method didn't exit itself (it was killed by an | |
906 * external entity, etc.), consider the entire | |
907 * method_run as failed. | |
908 */ | |
909 if (WIFSIGNALED(ret_status)) { | |
910 char buf[SIG2STR_MAX]; | |
911 (void) sig2str(WTERMSIG(ret_status), buf); | |
912 | |
913 log_error(LOG_WARNING, "%s: Method \"%s\" " | |
914 "failed due to signal %s.\n", | |
915 inst->ri_i.i_fmri, method, buf); | |
916 log_instance(inst, B_TRUE, "Method \"%s\" " | |
5238
37127716670e
Contributed by Mark Martin <storycrafter@gmail.com>.
lianep
parents:
5040
diff
changeset
|
917 "failed due to signal %s.", mname, buf); |
0 | 918 } else { |
919 log_error(LOG_WARNING, "%s: Method \"%s\" " | |
920 "failed with exit status %d.\n", | |
921 inst->ri_i.i_fmri, method, | |
922 WEXITSTATUS(ret_status)); | |
923 log_instance(inst, B_TRUE, "Method \"%s\" " | |
5238
37127716670e
Contributed by Mark Martin <storycrafter@gmail.com>.
lianep
parents:
5040
diff
changeset
|
924 "failed with exit status %d.", mname, |
0 | 925 WEXITSTATUS(ret_status)); |
926 } | |
927 result = EAGAIN; | |
928 goto contract_out; | |
929 } | |
930 | |
931 *exit_code = WEXITSTATUS(ret_status); | |
932 if (*exit_code != 0) { | |
933 log_error(LOG_WARNING, | |
934 "%s: Method \"%s\" failed with exit status %d.\n", | |
935 inst->ri_i.i_fmri, method, WEXITSTATUS(ret_status)); | |
936 } | |
937 | |
938 log_instance(inst, B_TRUE, "Method \"%s\" exited with status " | |
5238
37127716670e
Contributed by Mark Martin <storycrafter@gmail.com>.
lianep
parents:
5040
diff
changeset
|
939 "%d.", mname, *exit_code); |
0 | 940 |
941 if (*exit_code != 0) | |
942 goto contract_out; | |
943 | |
944 end_time = time(NULL); | |
945 | |
946 /* Give service contract remaining seconds to empty */ | |
947 if (timeout != METHOD_TIMEOUT_INFINITE) | |
948 timeout -= (end_time - start_time); | |
949 } | |
950 | |
951 assured_kill: | |
952 /* | |
953 * For stop methods, assure that the service contract has emptied | |
954 * before returning. | |
955 */ | |
956 if (type == METHOD_STOP && (!instance_is_transient_style(inst)) && | |
957 !(contract_is_empty(inst->ri_i.i_primary_ctid))) { | |
8944
e4ff744ddfe1
6805730 some simple changes would make 'init 5' much faster
Dan Price <dp@eng.sun.com>
parents:
6748
diff
changeset
|
958 int times = 0; |
0 | 959 |
960 if (timeout != METHOD_TIMEOUT_INFINITE) | |
961 timeout_insert(inst, inst->ri_i.i_primary_ctid, | |
962 timeout); | |
963 | |
964 for (;;) { | |
8944
e4ff744ddfe1
6805730 some simple changes would make 'init 5' much faster
Dan Price <dp@eng.sun.com>
parents:
6748
diff
changeset
|
965 /* |
e4ff744ddfe1
6805730 some simple changes would make 'init 5' much faster
Dan Price <dp@eng.sun.com>
parents:
6748
diff
changeset
|
966 * Check frequently at first, then back off. This |
e4ff744ddfe1
6805730 some simple changes would make 'init 5' much faster
Dan Price <dp@eng.sun.com>
parents:
6748
diff
changeset
|
967 * keeps startd from idling while shutting down. |
e4ff744ddfe1
6805730 some simple changes would make 'init 5' much faster
Dan Price <dp@eng.sun.com>
parents:
6748
diff
changeset
|
968 */ |
e4ff744ddfe1
6805730 some simple changes would make 'init 5' much faster
Dan Price <dp@eng.sun.com>
parents:
6748
diff
changeset
|
969 if (times < 20) { |
e4ff744ddfe1
6805730 some simple changes would make 'init 5' much faster
Dan Price <dp@eng.sun.com>
parents:
6748
diff
changeset
|
970 (void) poll(NULL, 0, 5); |
e4ff744ddfe1
6805730 some simple changes would make 'init 5' much faster
Dan Price <dp@eng.sun.com>
parents:
6748
diff
changeset
|
971 times++; |
e4ff744ddfe1
6805730 some simple changes would make 'init 5' much faster
Dan Price <dp@eng.sun.com>
parents:
6748
diff
changeset
|
972 } else { |
e4ff744ddfe1
6805730 some simple changes would make 'init 5' much faster
Dan Price <dp@eng.sun.com>
parents:
6748
diff
changeset
|
973 (void) poll(NULL, 0, 100); |
e4ff744ddfe1
6805730 some simple changes would make 'init 5' much faster
Dan Price <dp@eng.sun.com>
parents:
6748
diff
changeset
|
974 } |
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
975 if (contract_is_empty(inst->ri_i.i_primary_ctid)) |
0 | 976 break; |
977 } | |
978 | |
979 if (timeout != METHOD_TIMEOUT_INFINITE) | |
980 if (inst->ri_timeout->te_fired) | |
981 result = EFAULT; | |
982 | |
983 timeout_remove(inst, inst->ri_i.i_primary_ctid); | |
984 } | |
985 | |
986 contract_out: | |
987 /* Abandon contracts for transient methods & methods that fail. */ | |
988 transient = method_is_transient(inst, type); | |
989 if ((transient || *exit_code != 0 || result != 0) && | |
990 (restarter_is_kill_method(method) < 0)) | |
991 method_remove_contract(inst, !transient, B_TRUE); | |
992 | |
993 out: | |
994 if (ctfd >= 0) | |
995 (void) close(ctfd); | |
996 scf_snapshot_destroy(snap); | |
997 free(method); | |
998 return (result); | |
999 } | |
1000 | |
1001 /* | |
1002 * The method thread executes a service method to effect a state transition. | |
1003 * The next_state of info->sf_id should be non-_NONE on entrance, and it will | |
1004 * be _NONE on exit (state will either be what next_state was (on success), or | |
1005 * it will be _MAINT (on error)). | |
1006 * | |
1007 * There are six classes of methods to consider: start & other (stop, refresh) | |
1008 * for each of "normal" services, wait services, and transient services. For | |
1009 * each, the method must be fetched from the repository & executed. fork()ed | |
1010 * methods must be waited on, except for the start method of wait services | |
1011 * (which must be registered with the wait subsystem via wait_register()). If | |
1012 * the method succeeded (returned 0), then for start methods its contract | |
1013 * should be recorded as the primary contract for the service. For other | |
1014 * methods, it should be abandoned. If the method fails, then depending on | |
1015 * the failure, either the method should be reexecuted or the service should | |
1016 * be put into maintenance. Either way the contract should be abandoned. | |
1017 */ | |
1018 void * | |
1019 method_thread(void *arg) | |
1020 { | |
1021 fork_info_t *info = arg; | |
1022 restarter_inst_t *inst; | |
1023 scf_handle_t *local_handle; | |
1024 scf_instance_t *s_inst = NULL; | |
1025 int r, exit_code; | |
1026 boolean_t retryable; | |
1027 const char *aux; | |
1028 | |
1029 assert(0 <= info->sf_method_type && info->sf_method_type <= 2); | |
1030 | |
1031 /* Get (and lock) the restarter_inst_t. */ | |
1032 inst = inst_lookup_by_id(info->sf_id); | |
1033 | |
1034 assert(inst->ri_method_thread != 0); | |
1035 assert(instance_in_transition(inst) == 1); | |
1036 | |
1037 /* | |
1038 * We cannot leave this function with inst in transition, because | |
1039 * protocol.c withholds messages for inst otherwise. | |
1040 */ | |
1041 | |
1042 log_framework(LOG_DEBUG, "method_thread() running %s method for %s.\n", | |
1043 method_names[info->sf_method_type], inst->ri_i.i_fmri); | |
1044 | |
1045 local_handle = libscf_handle_create_bound_loop(); | |
1046 | |
1047 rebind_retry: | |
1048 /* get scf_instance_t */ | |
1049 switch (r = libscf_fmri_get_instance(local_handle, inst->ri_i.i_fmri, | |
1050 &s_inst)) { | |
1051 case 0: | |
1052 break; | |
1053 | |
1054 case ECONNABORTED: | |
1055 libscf_handle_rebind(local_handle); | |
1056 goto rebind_retry; | |
1057 | |
1058 case ENOENT: | |
1059 /* | |
1060 * It's not there, but we need to call this so protocol.c | |
1061 * doesn't think it's in transition anymore. | |
1062 */ | |
1063 (void) restarter_instance_update_states(local_handle, inst, | |
1064 inst->ri_i.i_state, RESTARTER_STATE_NONE, RERR_NONE, | |
1065 NULL); | |
1066 goto out; | |
1067 | |
1068 case EINVAL: | |
1069 case ENOTSUP: | |
1070 default: | |
1071 bad_error("libscf_fmri_get_instance", r); | |
1072 } | |
1073 | |
1074 inst->ri_m_inst = s_inst; | |
1075 inst->ri_mi_deleted = B_FALSE; | |
1076 | |
1077 retry: | |
1078 if (info->sf_method_type == METHOD_START) | |
1079 log_transition(inst, START_REQUESTED); | |
1080 | |
1081 r = method_run(&inst, info->sf_method_type, &exit_code); | |
1082 | |
1083 if (r == 0 && exit_code == 0) { | |
1084 /* Success! */ | |
1085 assert(inst->ri_i.i_next_state != RESTARTER_STATE_NONE); | |
1086 | |
1087 /* | |
1088 * When a stop method succeeds, remove the primary contract of | |
1089 * the service, unless we're going to offline, in which case | |
1090 * retain the contract so we can transfer inherited contracts to | |
1091 * the replacement service. | |
1092 */ | |
1093 | |
1094 if (info->sf_method_type == METHOD_STOP && | |
1095 inst->ri_i.i_primary_ctid != 0) { | |
1096 if (inst->ri_i.i_next_state == RESTARTER_STATE_OFFLINE) | |
1097 inst->ri_i.i_primary_ctid_stopped = 1; | |
1098 else | |
1099 method_remove_contract(inst, B_TRUE, B_TRUE); | |
1100 } | |
1101 /* | |
1102 * We don't care whether the handle was rebound because this is | |
1103 * the last thing we do with it. | |
1104 */ | |
1105 (void) restarter_instance_update_states(local_handle, inst, | |
1106 inst->ri_i.i_next_state, RESTARTER_STATE_NONE, | |
1107 info->sf_event_type, NULL); | |
1108 | |
1109 (void) update_fault_count(inst, FAULT_COUNT_RESET); | |
1110 | |
1111 goto out; | |
1112 } | |
1113 | |
1114 /* Failure. Retry or go to maintenance. */ | |
1115 | |
1116 if (r != 0 && r != EAGAIN) { | |
1117 retryable = B_FALSE; | |
1118 } else { | |
1119 switch (exit_code) { | |
1120 case SMF_EXIT_ERR_CONFIG: | |
1121 case SMF_EXIT_ERR_NOSMF: | |
1122 case SMF_EXIT_ERR_PERM: | |
1123 case SMF_EXIT_ERR_FATAL: | |
1124 retryable = B_FALSE; | |
1125 break; | |
1126 | |
1127 default: | |
1128 retryable = B_TRUE; | |
1129 } | |
1130 } | |
1131 | |
1132 if (retryable && update_fault_count(inst, FAULT_COUNT_INCR) != 1) | |
1133 goto retry; | |
1134 | |
1135 /* maintenance */ | |
1136 if (r == ELOOP) | |
1137 log_transition(inst, START_FAILED_REPEATEDLY); | |
1138 else if (r == ERANGE) | |
1139 log_transition(inst, START_FAILED_TIMEOUT_FATAL); | |
1140 else if (exit_code == SMF_EXIT_ERR_CONFIG) | |
1141 log_transition(inst, START_FAILED_CONFIGURATION); | |
1142 else if (exit_code == SMF_EXIT_ERR_FATAL) | |
1143 log_transition(inst, START_FAILED_FATAL); | |
1144 else | |
1145 log_transition(inst, START_FAILED_OTHER); | |
1146 | |
1147 if (r == ELOOP) | |
1148 aux = "restarting_too_quickly"; | |
1149 else if (retryable) | |
1150 aux = "fault_threshold_reached"; | |
1151 else | |
1152 aux = "method_failed"; | |
1153 | |
1154 (void) restarter_instance_update_states(local_handle, inst, | |
1155 RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_FAULT, | |
1156 (char *)aux); | |
1157 | |
1158 if (!method_is_transient(inst, info->sf_method_type) && | |
1159 inst->ri_i.i_primary_ctid != 0) | |
1160 method_remove_contract(inst, B_TRUE, B_TRUE); | |
1161 | |
1162 out: | |
1163 inst->ri_method_thread = 0; | |
6748
6c7adefd809e
6705022 *svc.startd* coredumps in restarter_insert_inst()
rm88369
parents:
6073
diff
changeset
|
1164 |
6c7adefd809e
6705022 *svc.startd* coredumps in restarter_insert_inst()
rm88369
parents:
6073
diff
changeset
|
1165 /* |
6c7adefd809e
6705022 *svc.startd* coredumps in restarter_insert_inst()
rm88369
parents:
6073
diff
changeset
|
1166 * Unlock the mutex after broadcasting to avoid a race condition |
6c7adefd809e
6705022 *svc.startd* coredumps in restarter_insert_inst()
rm88369
parents:
6073
diff
changeset
|
1167 * with restarter_delete_inst() when the 'inst' structure is freed. |
6c7adefd809e
6705022 *svc.startd* coredumps in restarter_insert_inst()
rm88369
parents:
6073
diff
changeset
|
1168 */ |
6c7adefd809e
6705022 *svc.startd* coredumps in restarter_insert_inst()
rm88369
parents:
6073
diff
changeset
|
1169 (void) pthread_cond_broadcast(&inst->ri_method_cv); |
0 | 1170 MUTEX_UNLOCK(&inst->ri_lock); |
1171 | |
1172 scf_instance_destroy(s_inst); | |
1173 scf_handle_destroy(local_handle); | |
1174 startd_free(info, sizeof (fork_info_t)); | |
1175 return (NULL); | |
1176 } |