Mercurial > illumos > illumos-gate
annotate usr/src/cmd/svc/startd/method.c @ 14183:68927c785889 default tip
4099 SMF methods without absolute paths no longer work
Reviewed by: Richard Lowe <richlowe@richlowe.net>
Approved by: Dan McDonald <danmcd@nexenta.com>
author | Jerry Jelinek <jerry.jelinek@joyent.com> |
---|---|
date | Fri, 06 Sep 2013 09:20:56 -0700 |
parents | 3159d5f5b16a |
children |
rev | line source |
---|---|
0 | 1 /* |
2 * CDDL HEADER START | |
3 * | |
4 * The contents of this file are subject to the terms of the | |
1712 | 5 * Common Development and Distribution License (the "License"). |
6 * You may not use this file except in compliance with the License. | |
0 | 7 * |
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 * or http://www.opensolaris.org/os/licensing. | |
10 * See the License for the specific language governing permissions | |
11 * and limitations under the License. | |
12 * | |
13 * When distributing Covered Code, include this CDDL HEADER in each | |
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 * If applicable, add the following below this CDDL HEADER, with the | |
16 * fields enclosed by brackets "[]" replaced with your own identifying | |
17 * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 * | |
19 * CDDL HEADER END | |
20 */ | |
9263
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
21 |
0 | 22 /* |
12979
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11466
diff
changeset
|
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. |
13310
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
24 * Copyright 2011 Joyent Inc. |
0 | 25 */ |
26 | |
27 /* | |
28 * method.c - method execution functions | |
29 * | |
30 * This file contains the routines needed to run a method: a fork(2)-exec(2) | |
31 * invocation monitored using either the contract filesystem or waitpid(2). | |
32 * (Plain fork1(2) support is provided in fork.c.) | |
33 * | |
34 * Contract Transfer | |
35 * When we restart a service, we want to transfer any contracts that the old | |
36 * service's contract inherited. This means that (a) we must not abandon the | |
37 * old contract when the service dies and (b) we must write the id of the old | |
38 * contract into the terms of the new contract. There should be limits to | |
39 * (a), though, since we don't want to keep the contract around forever. To | |
40 * this end we'll say that services in the offline state may have a contract | |
41 * to be transfered and services in the disabled or maintenance states cannot. | |
42 * This means that when a service transitions from online (or degraded) to | |
43 * offline, the contract should be preserved, and when the service transitions | |
44 * from offline to online (i.e., the start method), we'll transfer inherited | |
45 * contracts. | |
46 */ | |
47 | |
48 #include <sys/contract/process.h> | |
49 #include <sys/ctfs.h> | |
50 #include <sys/stat.h> | |
51 #include <sys/time.h> | |
52 #include <sys/types.h> | |
53 #include <sys/uio.h> | |
54 #include <sys/wait.h> | |
55 #include <alloca.h> | |
56 #include <assert.h> | |
57 #include <errno.h> | |
58 #include <fcntl.h> | |
59 #include <libcontract.h> | |
60 #include <libcontract_priv.h> | |
61 #include <libgen.h> | |
62 #include <librestart.h> | |
63 #include <libscf.h> | |
64 #include <limits.h> | |
65 #include <port.h> | |
66 #include <sac.h> | |
67 #include <signal.h> | |
68 #include <stdlib.h> | |
69 #include <string.h> | |
70 #include <strings.h> | |
71 #include <unistd.h> | |
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
72 #include <atomic.h> |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
73 #include <poll.h> |
13310
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
74 #include <libscf_priv.h> |
0 | 75 |
76 #include "startd.h" | |
77 | |
78 #define SBIN_SH "/sbin/sh" | |
79 | |
80 /* | |
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
81 * Used to tell if contracts are in the process of being |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
82 * stored into the svc.startd internal hash table. |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
83 */ |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
84 volatile uint16_t storing_contract = 0; |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
85 |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
86 /* |
0 | 87 * Mapping from restart_on method-type to contract events. Must correspond to |
88 * enum method_restart_t. | |
89 */ | |
90 static uint_t method_events[] = { | |
91 /* METHOD_RESTART_ALL */ | |
92 CT_PR_EV_HWERR | CT_PR_EV_SIGNAL | CT_PR_EV_CORE | CT_PR_EV_EMPTY, | |
93 /* METHOD_RESTART_EXTERNAL_FAULT */ | |
94 CT_PR_EV_HWERR | CT_PR_EV_SIGNAL, | |
95 /* METHOD_RESTART_ANY_FAULT */ | |
96 CT_PR_EV_HWERR | CT_PR_EV_SIGNAL | CT_PR_EV_CORE | |
97 }; | |
98 | |
99 /* | |
100 * method_record_start(restarter_inst_t *) | |
101 * Record a service start for rate limiting. Place the current time | |
102 * in the circular array of instance starts. | |
103 */ | |
104 static void | |
105 method_record_start(restarter_inst_t *inst) | |
106 { | |
107 int index = inst->ri_start_index++ % RINST_START_TIMES; | |
108 | |
109 inst->ri_start_time[index] = gethrtime(); | |
110 } | |
111 | |
112 /* | |
113 * method_rate_critical(restarter_inst_t *) | |
114 * Return true if the average start interval is less than the permitted | |
13310
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
115 * interval. The implicit interval defaults to RINST_FAILURE_RATE_NS and |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
116 * RINST_START_TIMES but may be overridden with the svc properties |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
117 * startd/critical_failure_count and startd/critical_failure_period |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
118 * which represent the number of failures to consider and the amount of |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
119 * time in seconds in which that number may occur, respectively. Note that |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
120 * this time is measured as of the transition to 'enabled' rather than wall |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
121 * clock time. |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
122 * Implicit success if insufficient measurements for an average exist. |
0 | 123 */ |
14130
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
124 int |
0 | 125 method_rate_critical(restarter_inst_t *inst) |
126 { | |
14130
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
127 hrtime_t critical_failure_period; |
13310
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
128 uint_t critical_failure_count = RINST_START_TIMES; |
0 | 129 uint_t n = inst->ri_start_index; |
130 hrtime_t avg_ns = 0; | |
13310
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
131 uint64_t scf_fr, scf_st; |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
132 scf_propvec_t *prop = NULL; |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
133 scf_propvec_t restart_critical[] = { |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
134 { "critical_failure_period", NULL, SCF_TYPE_INTEGER, NULL, 0 }, |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
135 { "critical_failure_count", NULL, SCF_TYPE_INTEGER, NULL, 0 }, |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
136 { NULL } |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
137 }; |
0 | 138 |
14130
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
139 if (instance_is_wait_style(inst)) |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
140 critical_failure_period = RINST_WT_SVC_FAILURE_RATE_NS; |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
141 else |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
142 critical_failure_period = RINST_FAILURE_RATE_NS; |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
143 |
13310
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
144 restart_critical[0].pv_ptr = &scf_fr; |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
145 restart_critical[1].pv_ptr = &scf_st; |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
146 |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
147 if (scf_read_propvec(inst->ri_i.i_fmri, "startd", |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
148 B_TRUE, restart_critical, &prop) != SCF_FAILED) { |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
149 /* |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
150 * critical_failure_period is expressed |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
151 * in seconds but tracked in ns |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
152 */ |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
153 critical_failure_period = (hrtime_t)scf_fr * NANOSEC; |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
154 critical_failure_count = (uint_t)scf_st; |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
155 } |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
156 if (inst->ri_start_index < critical_failure_count) |
0 | 157 return (0); |
158 | |
159 avg_ns = | |
13310
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
160 (inst->ri_start_time[(n - 1) % critical_failure_count] - |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
161 inst->ri_start_time[n % critical_failure_count]) / |
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
162 (critical_failure_count - 1); |
0 | 163 |
13310
fc9f3d59525e
825 Would like SMF critical restart rate to be configurable
John Sonnenschein <johns@joyent.com>
parents:
12979
diff
changeset
|
164 return (avg_ns < critical_failure_period); |
0 | 165 } |
166 | |
167 /* | |
168 * int method_is_transient() | |
169 * Determine if the method for the given instance is transient, | |
170 * from a contract perspective. Return 1 if it is, and 0 if it isn't. | |
171 */ | |
172 static int | |
173 method_is_transient(restarter_inst_t *inst, int type) | |
174 { | |
175 if (instance_is_transient_style(inst) || type != METHOD_START) | |
176 return (1); | |
177 else | |
178 return (0); | |
179 } | |
180 | |
181 /* | |
182 * void method_store_contract() | |
183 * Store the newly created contract id into local structures and | |
184 * the repository. If the repository connection is broken it is rebound. | |
185 */ | |
186 static void | |
187 method_store_contract(restarter_inst_t *inst, int type, ctid_t *cid) | |
188 { | |
189 int r; | |
190 boolean_t primary; | |
191 | |
192 if (errno = contract_latest(cid)) | |
193 uu_die("%s: Couldn't get new contract's id", inst->ri_i.i_fmri); | |
194 | |
195 primary = !method_is_transient(inst, type); | |
196 | |
197 if (!primary) { | |
198 if (inst->ri_i.i_transient_ctid != 0) { | |
199 log_framework(LOG_INFO, | |
200 "%s: transient ctid expected to be 0 but " | |
201 "was set to %ld\n", inst->ri_i.i_fmri, | |
202 inst->ri_i.i_transient_ctid); | |
203 } | |
204 | |
205 inst->ri_i.i_transient_ctid = *cid; | |
206 } else { | |
207 if (inst->ri_i.i_primary_ctid != 0) { | |
208 /* | |
209 * There was an old contract that we transferred. | |
210 * Remove it. | |
211 */ | |
212 method_remove_contract(inst, B_TRUE, B_FALSE); | |
213 } | |
214 | |
215 if (inst->ri_i.i_primary_ctid != 0) { | |
216 log_framework(LOG_INFO, | |
217 "%s: primary ctid expected to be 0 but " | |
218 "was set to %ld\n", inst->ri_i.i_fmri, | |
219 inst->ri_i.i_primary_ctid); | |
220 } | |
221 | |
222 inst->ri_i.i_primary_ctid = *cid; | |
223 inst->ri_i.i_primary_ctid_stopped = 0; | |
224 | |
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
225 log_framework(LOG_DEBUG, "Storing primary contract %ld for " |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
226 "%s.\n", *cid, inst->ri_i.i_fmri); |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
227 |
0 | 228 contract_hash_store(*cid, inst->ri_id); |
229 } | |
230 | |
231 again: | |
232 if (inst->ri_mi_deleted) | |
233 return; | |
234 | |
235 r = restarter_store_contract(inst->ri_m_inst, *cid, primary ? | |
236 RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT); | |
237 switch (r) { | |
238 case 0: | |
239 break; | |
240 | |
241 case ECANCELED: | |
242 inst->ri_mi_deleted = B_TRUE; | |
243 break; | |
244 | |
245 case ECONNABORTED: | |
246 libscf_handle_rebind(scf_instance_handle(inst->ri_m_inst)); | |
247 /* FALLTHROUGH */ | |
248 | |
249 case EBADF: | |
250 libscf_reget_instance(inst); | |
251 goto again; | |
252 | |
253 case ENOMEM: | |
254 case EPERM: | |
255 case EACCES: | |
256 case EROFS: | |
257 uu_die("%s: Couldn't store contract id %ld", | |
258 inst->ri_i.i_fmri, *cid); | |
259 /* NOTREACHED */ | |
260 | |
261 case EINVAL: | |
262 default: | |
263 bad_error("restarter_store_contract", r); | |
264 } | |
265 } | |
266 | |
267 /* | |
268 * void method_remove_contract() | |
269 * Remove any non-permanent contracts from internal structures and | |
270 * the repository, then abandon them. | |
271 * Returns | |
272 * 0 - success | |
273 * ECANCELED - inst was deleted from the repository | |
274 * | |
275 * If the repository connection was broken, it is rebound. | |
276 */ | |
277 void | |
278 method_remove_contract(restarter_inst_t *inst, boolean_t primary, | |
279 boolean_t abandon) | |
280 { | |
281 ctid_t * const ctidp = primary ? &inst->ri_i.i_primary_ctid : | |
282 &inst->ri_i.i_transient_ctid; | |
283 | |
284 int r; | |
285 | |
286 assert(*ctidp != 0); | |
287 | |
288 log_framework(LOG_DEBUG, "Removing %s contract %lu for %s.\n", | |
289 primary ? "primary" : "transient", *ctidp, inst->ri_i.i_fmri); | |
290 | |
291 if (abandon) | |
292 contract_abandon(*ctidp); | |
293 | |
294 again: | |
295 if (inst->ri_mi_deleted) { | |
296 r = ECANCELED; | |
297 goto out; | |
298 } | |
299 | |
300 r = restarter_remove_contract(inst->ri_m_inst, *ctidp, primary ? | |
301 RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT); | |
302 switch (r) { | |
303 case 0: | |
304 break; | |
305 | |
306 case ECANCELED: | |
307 inst->ri_mi_deleted = B_TRUE; | |
308 break; | |
309 | |
310 case ECONNABORTED: | |
311 libscf_handle_rebind(scf_instance_handle(inst->ri_m_inst)); | |
312 /* FALLTHROUGH */ | |
313 | |
314 case EBADF: | |
315 libscf_reget_instance(inst); | |
316 goto again; | |
317 | |
318 case ENOMEM: | |
319 case EPERM: | |
320 case EACCES: | |
321 case EROFS: | |
322 log_error(LOG_INFO, "%s: Couldn't remove contract id %ld: " | |
323 "%s.\n", inst->ri_i.i_fmri, *ctidp, strerror(r)); | |
324 break; | |
325 | |
326 case EINVAL: | |
327 default: | |
328 bad_error("restarter_remove_contract", r); | |
329 } | |
330 | |
331 out: | |
332 if (primary) | |
333 contract_hash_remove(*ctidp); | |
334 | |
335 *ctidp = 0; | |
336 } | |
337 | |
6073 | 338 static const char *method_names[] = { "start", "stop", "refresh" }; |
339 | |
0 | 340 /* |
341 * int method_ready_contract(restarter_inst_t *, int, method_restart_t, int) | |
342 * | |
343 * Activate a contract template for the type method of inst. type, | |
344 * restart_on, and cte_mask dictate the critical events term of the contract. | |
345 * Returns | |
346 * 0 - success | |
347 * ECANCELED - inst has been deleted from the repository | |
348 */ | |
349 static int | |
350 method_ready_contract(restarter_inst_t *inst, int type, | |
351 method_restart_t restart_on, uint_t cte_mask) | |
352 { | |
353 int tmpl, err, istrans, iswait, ret; | |
354 uint_t cevents, fevents; | |
355 | |
356 /* | |
357 * Correctly supporting wait-style services is tricky without | |
358 * rearchitecting startd to cope with multiple event sources | |
359 * simultaneously trying to stop an instance. Until a better | |
360 * solution is implemented, we avoid this problem for | |
361 * wait-style services by making contract events fatal and | |
362 * letting the wait code alone handle stopping the service. | |
363 */ | |
364 iswait = instance_is_wait_style(inst); | |
365 istrans = method_is_transient(inst, type); | |
366 | |
367 tmpl = open64(CTFS_ROOT "/process/template", O_RDWR); | |
368 if (tmpl == -1) | |
369 uu_die("Could not create contract template"); | |
370 | |
371 /* | |
372 * We assume non-login processes are unlikely to create | |
373 * multiple process groups, and set CT_PR_PGRPONLY for all | |
374 * wait-style services' contracts. | |
375 */ | |
376 err = ct_pr_tmpl_set_param(tmpl, CT_PR_INHERIT | CT_PR_REGENT | | |
377 (iswait ? CT_PR_PGRPONLY : 0)); | |
378 assert(err == 0); | |
379 | |
380 if (istrans) { | |
381 cevents = 0; | |
382 fevents = 0; | |
383 } else { | |
384 assert(restart_on >= 0); | |
385 assert(restart_on <= METHOD_RESTART_ANY_FAULT); | |
386 cevents = method_events[restart_on] & ~cte_mask; | |
387 fevents = iswait ? | |
388 (method_events[restart_on] & ~cte_mask & CT_PR_ALLFATAL) : | |
389 0; | |
390 } | |
391 | |
392 err = ct_tmpl_set_critical(tmpl, cevents); | |
393 assert(err == 0); | |
394 | |
395 err = ct_tmpl_set_informative(tmpl, 0); | |
396 assert(err == 0); | |
397 err = ct_pr_tmpl_set_fatal(tmpl, fevents); | |
398 assert(err == 0); | |
399 | |
400 err = ct_tmpl_set_cookie(tmpl, istrans ? METHOD_OTHER_COOKIE : | |
401 METHOD_START_COOKIE); | |
402 assert(err == 0); | |
403 | |
404 if (type == METHOD_START && inst->ri_i.i_primary_ctid != 0) { | |
405 ret = ct_pr_tmpl_set_transfer(tmpl, inst->ri_i.i_primary_ctid); | |
406 switch (ret) { | |
407 case 0: | |
408 break; | |
409 | |
410 case ENOTEMPTY: | |
411 /* No contracts for you! */ | |
412 method_remove_contract(inst, B_TRUE, B_TRUE); | |
413 if (inst->ri_mi_deleted) { | |
414 ret = ECANCELED; | |
415 goto out; | |
416 } | |
417 break; | |
418 | |
419 case EINVAL: | |
420 case ESRCH: | |
421 case EACCES: | |
422 default: | |
423 bad_error("ct_pr_tmpl_set_transfer", ret); | |
424 } | |
425 } | |
426 | |
6073 | 427 err = ct_pr_tmpl_set_svc_fmri(tmpl, inst->ri_i.i_fmri); |
428 assert(err == 0); | |
429 err = ct_pr_tmpl_set_svc_aux(tmpl, method_names[type]); | |
430 assert(err == 0); | |
431 | |
0 | 432 err = ct_tmpl_activate(tmpl); |
433 assert(err == 0); | |
434 | |
435 ret = 0; | |
436 | |
437 out: | |
438 err = close(tmpl); | |
439 assert(err == 0); | |
440 | |
441 return (ret); | |
442 } | |
443 | |
444 static void | |
445 exec_method(const restarter_inst_t *inst, int type, const char *method, | |
446 struct method_context *mcp, uint8_t need_session) | |
447 { | |
448 char *cmd; | |
449 const char *errf; | |
450 char **nenv; | |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
451 int rsmc_errno = 0; |
0 | 452 |
453 cmd = uu_msprintf("exec %s", method); | |
454 | |
455 if (inst->ri_utmpx_prefix[0] != '\0' && inst->ri_utmpx_prefix != NULL) | |
456 (void) utmpx_mark_init(getpid(), inst->ri_utmpx_prefix); | |
457 | |
458 setlog(inst->ri_logstem); | |
5238
37127716670e
Contributed by Mark Martin <storycrafter@gmail.com>.
lianep
parents:
5040
diff
changeset
|
459 log_instance(inst, B_FALSE, "Executing %s method (\"%s\").", |
0 | 460 method_names[type], method); |
461 | |
462 if (need_session) | |
463 (void) setpgrp(); | |
464 | |
465 /* Set credentials. */ | |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
466 rsmc_errno = restarter_set_method_context(mcp, &errf); |
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
467 if (rsmc_errno != 0) { |
9263
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
468 log_instance(inst, B_FALSE, |
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
469 "svc.startd could not set context for method: "); |
0 | 470 |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
471 if (rsmc_errno == -1) { |
0 | 472 if (strcmp(errf, "core_set_process_path") == 0) { |
9263
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
473 log_instance(inst, B_FALSE, |
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
474 "Could not set corefile path."); |
0 | 475 } else if (strcmp(errf, "setproject") == 0) { |
9263
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
476 log_instance(inst, B_FALSE, "%s: a resource " |
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
477 "control assignment failed", errf); |
0 | 478 } else if (strcmp(errf, "pool_set_binding") == 0) { |
9263
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
479 log_instance(inst, B_FALSE, "%s: a system " |
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
480 "error occurred", errf); |
0 | 481 } else { |
482 #ifndef NDEBUG | |
483 uu_warn("%s:%d: Bad function name \"%s\" for " | |
484 "error %d from " | |
485 "restarter_set_method_context().\n", | |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
486 __FILE__, __LINE__, errf, rsmc_errno); |
0 | 487 #endif |
488 abort(); | |
489 } | |
490 | |
491 exit(1); | |
492 } | |
493 | |
494 if (errf != NULL && strcmp(errf, "pool_set_binding") == 0) { | |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
495 switch (rsmc_errno) { |
0 | 496 case ENOENT: |
9263
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
497 log_instance(inst, B_FALSE, "%s: the pool " |
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
498 "could not be found", errf); |
0 | 499 break; |
500 | |
501 case EBADF: | |
9263
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
502 log_instance(inst, B_FALSE, "%s: the " |
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
503 "configuration is invalid", errf); |
0 | 504 break; |
505 | |
1712 | 506 case EINVAL: |
9263
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
507 log_instance(inst, B_FALSE, "%s: pool name " |
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
508 "\"%s\" is invalid", errf, |
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
509 mcp->resource_pool); |
1712 | 510 break; |
511 | |
0 | 512 default: |
513 #ifndef NDEBUG | |
514 uu_warn("%s:%d: Bad error %d for function %s " | |
515 "in restarter_set_method_context().\n", | |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
516 __FILE__, __LINE__, rsmc_errno, errf); |
0 | 517 #endif |
518 abort(); | |
519 } | |
520 | |
521 exit(SMF_EXIT_ERR_CONFIG); | |
522 } | |
523 | |
13791
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
524 if (errf != NULL && strcmp(errf, "chdir") == 0) { |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
525 switch (rsmc_errno) { |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
526 case EACCES: |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
527 case EFAULT: |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
528 case EIO: |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
529 case ELOOP: |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
530 case ENAMETOOLONG: |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
531 case ENOENT: |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
532 case ENOLINK: |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
533 case ENOTDIR: |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
534 log_instance(inst, B_FALSE, "%s: %s (\"%s\")", |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
535 errf, |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
536 strerror(rsmc_errno), mcp->working_dir); |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
537 break; |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
538 |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
539 default: |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
540 #ifndef NDEBUG |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
541 uu_warn("%s:%d: Bad error %d for function %s " |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
542 "in restarter_set_method_context().\n", |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
543 __FILE__, __LINE__, rsmc_errno, errf); |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
544 #endif |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
545 abort(); |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
546 } |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
547 |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
548 exit(SMF_EXIT_ERR_CONFIG); |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
549 } |
40cea5d62fa3
3121 missing SMF method directories should say something useful
Hengqing Hu <hudayou@hotmail.com>
parents:
13310
diff
changeset
|
550 |
0 | 551 if (errf != NULL) { |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
552 errno = rsmc_errno; |
0 | 553 perror(errf); |
554 | |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
555 switch (rsmc_errno) { |
0 | 556 case EINVAL: |
557 case EPERM: | |
558 case ENOENT: | |
559 case ENAMETOOLONG: | |
560 case ERANGE: | |
561 case ESRCH: | |
562 exit(SMF_EXIT_ERR_CONFIG); | |
563 /* NOTREACHED */ | |
564 | |
565 default: | |
566 exit(1); | |
567 } | |
568 } | |
569 | |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
570 switch (rsmc_errno) { |
0 | 571 case ENOMEM: |
9263
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
572 log_instance(inst, B_FALSE, "Out of memory."); |
0 | 573 exit(1); |
574 /* NOTREACHED */ | |
575 | |
576 case ENOENT: | |
9263
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
577 log_instance(inst, B_FALSE, "Missing passwd entry for " |
48d14e1f550f
6411391 Empty method_contexts don't work
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
8944
diff
changeset
|
578 "user."); |
0 | 579 exit(SMF_EXIT_ERR_CONFIG); |
580 /* NOTREACHED */ | |
581 | |
582 default: | |
583 #ifndef NDEBUG | |
584 uu_warn("%s:%d: Bad miscellaneous error %d from " | |
585 "restarter_set_method_context().\n", __FILE__, | |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
586 __LINE__, rsmc_errno); |
0 | 587 #endif |
588 abort(); | |
589 } | |
590 } | |
591 | |
5040
ff6ebd8761a6
PSARC 2007/177 SMF read-protected property storage
wesolows
parents:
4816
diff
changeset
|
592 nenv = set_smf_env(mcp->env, mcp->env_sz, NULL, inst, |
ff6ebd8761a6
PSARC 2007/177 SMF read-protected property storage
wesolows
parents:
4816
diff
changeset
|
593 method_names[type]); |
0 | 594 |
595 log_preexec(); | |
596 | |
597 (void) execle(SBIN_SH, SBIN_SH, "-c", cmd, NULL, nenv); | |
598 | |
599 exit(10); | |
600 } | |
601 | |
602 static void | |
603 write_status(restarter_inst_t *inst, const char *mname, int stat) | |
604 { | |
605 int r; | |
606 | |
607 again: | |
608 if (inst->ri_mi_deleted) | |
609 return; | |
610 | |
611 r = libscf_write_method_status(inst->ri_m_inst, mname, stat); | |
612 switch (r) { | |
613 case 0: | |
614 break; | |
615 | |
616 case ECONNABORTED: | |
617 libscf_reget_instance(inst); | |
618 goto again; | |
619 | |
620 case ECANCELED: | |
621 inst->ri_mi_deleted = 1; | |
622 break; | |
623 | |
624 case EPERM: | |
625 case EACCES: | |
626 case EROFS: | |
627 log_framework(LOG_INFO, "Could not write exit status " | |
628 "for %s method of %s: %s.\n", mname, | |
629 inst->ri_i.i_fmri, strerror(r)); | |
630 break; | |
631 | |
632 case ENAMETOOLONG: | |
633 default: | |
634 bad_error("libscf_write_method_status", r); | |
635 } | |
636 } | |
637 | |
638 /* | |
639 * int method_run() | |
640 * Execute the type method of instp. If it requires a fork(), wait for it | |
641 * to return and return its exit code in *exit_code. Otherwise set | |
642 * *exit_code to 0 if the method succeeds & -1 if it fails. If the | |
643 * repository connection is broken, it is rebound, but inst may not be | |
644 * reset. | |
645 * Returns | |
646 * 0 - success | |
647 * EINVAL - A correct method or method context couldn't be retrieved. | |
648 * EIO - Contract kill failed. | |
649 * EFAULT - Method couldn't be executed successfully. | |
650 * ELOOP - Retry threshold exceeded. | |
651 * ECANCELED - inst was deleted from the repository before method was run | |
652 * ERANGE - Timeout retry threshold exceeded. | |
653 * EAGAIN - Failed due to external cause, retry. | |
654 */ | |
655 int | |
656 method_run(restarter_inst_t **instp, int type, int *exit_code) | |
657 { | |
658 char *method; | |
659 int ret_status; | |
660 pid_t pid; | |
661 method_restart_t restart_on; | |
662 uint_t cte_mask; | |
663 uint8_t need_session; | |
664 scf_handle_t *h; | |
665 scf_snapshot_t *snap; | |
666 const char *mname; | |
9765
2522fef20c5f
6215238 svc.startd could provide better log messages for faulty method_context contents
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
9263
diff
changeset
|
667 mc_error_t *m_error; |
0 | 668 struct method_context *mcp; |
669 int result = 0, timeout_fired = 0; | |
670 int sig, r; | |
671 boolean_t transient; | |
672 uint64_t timeout; | |
673 uint8_t timeout_retry; | |
674 ctid_t ctid; | |
675 int ctfd = -1; | |
676 restarter_inst_t *inst = *instp; | |
677 int id = inst->ri_id; | |
119
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
678 int forkerr; |
0 | 679 |
11466
d60272412fb0
6915578 MUTEX_HELD() and RW_LOCK_HELD() macros should be available to Posix threads
Roger A. Faulkner <Roger.Faulkner@Sun.COM>
parents:
9765
diff
changeset
|
680 assert(MUTEX_HELD(&inst->ri_lock)); |
0 | 681 assert(instance_in_transition(inst)); |
682 | |
683 if (inst->ri_mi_deleted) | |
684 return (ECANCELED); | |
685 | |
686 *exit_code = 0; | |
687 | |
688 assert(0 <= type && type <= 2); | |
689 mname = method_names[type]; | |
690 | |
691 if (type == METHOD_START) | |
692 inst->ri_pre_online_hook(); | |
693 | |
694 h = scf_instance_handle(inst->ri_m_inst); | |
695 | |
696 snap = scf_snapshot_create(h); | |
697 if (snap == NULL || | |
698 scf_instance_get_snapshot(inst->ri_m_inst, "running", snap) != 0) { | |
699 log_framework(LOG_DEBUG, | |
700 "Could not get running snapshot for %s. " | |
701 "Using editing version to run method %s.\n", | |
702 inst->ri_i.i_fmri, mname); | |
703 scf_snapshot_destroy(snap); | |
704 snap = NULL; | |
705 } | |
706 | |
707 /* | |
708 * After this point, we may be logging to the instance log. | |
709 * Make sure we've noted where that log is as a property of | |
710 * the instance. | |
711 */ | |
712 r = libscf_note_method_log(inst->ri_m_inst, st->st_log_prefix, | |
713 inst->ri_logstem); | |
714 if (r != 0) { | |
715 log_framework(LOG_WARNING, | |
716 "%s: couldn't note log location: %s\n", | |
717 inst->ri_i.i_fmri, strerror(r)); | |
718 } | |
719 | |
720 if ((method = libscf_get_method(h, type, inst, snap, &restart_on, | |
721 &cte_mask, &need_session, &timeout, &timeout_retry)) == NULL) { | |
722 if (errno == LIBSCF_PGROUP_ABSENT) { | |
723 log_framework(LOG_DEBUG, | |
724 "%s: instance has no method property group '%s'.\n", | |
725 inst->ri_i.i_fmri, mname); | |
726 if (type == METHOD_REFRESH) | |
727 log_instance(inst, B_TRUE, "No '%s' method " | |
728 "defined. Treating as :true.", mname); | |
729 else | |
730 log_instance(inst, B_TRUE, "Method property " | |
731 "group '%s' is not present.", mname); | |
732 scf_snapshot_destroy(snap); | |
733 return (0); | |
734 } else if (errno == LIBSCF_PROPERTY_ABSENT) { | |
735 log_framework(LOG_DEBUG, | |
736 "%s: instance has no '%s/exec' method property.\n", | |
737 inst->ri_i.i_fmri, mname); | |
738 log_instance(inst, B_TRUE, "Method property '%s/exec " | |
739 "is not present.", mname); | |
740 scf_snapshot_destroy(snap); | |
741 return (0); | |
742 } else { | |
743 log_error(LOG_WARNING, | |
744 "%s: instance libscf_get_method failed\n", | |
745 inst->ri_i.i_fmri); | |
746 scf_snapshot_destroy(snap); | |
747 return (EINVAL); | |
748 } | |
749 } | |
750 | |
751 /* open service contract if stopping a non-transient service */ | |
752 if (type == METHOD_STOP && (!instance_is_transient_style(inst))) { | |
753 if (inst->ri_i.i_primary_ctid == 0) { | |
754 /* service is not running, nothing to stop */ | |
755 log_framework(LOG_DEBUG, "%s: instance has no primary " | |
756 "contract, no service to stop.\n", | |
757 inst->ri_i.i_fmri); | |
758 scf_snapshot_destroy(snap); | |
759 return (0); | |
760 } | |
761 if ((ctfd = contract_open(inst->ri_i.i_primary_ctid, "process", | |
762 "events", O_RDONLY)) < 0) { | |
763 result = EFAULT; | |
764 log_instance(inst, B_TRUE, "Could not open service " | |
5238
37127716670e
Contributed by Mark Martin <storycrafter@gmail.com>.
lianep
parents:
5040
diff
changeset
|
765 "contract %ld. Stop method not run.", |
0 | 766 inst->ri_i.i_primary_ctid); |
767 goto out; | |
768 } | |
769 } | |
770 | |
771 if (restarter_is_null_method(method)) { | |
772 log_framework(LOG_DEBUG, "%s: null method succeeds\n", | |
773 inst->ri_i.i_fmri); | |
774 | |
5238
37127716670e
Contributed by Mark Martin <storycrafter@gmail.com>.
lianep
parents:
5040
diff
changeset
|
775 log_instance(inst, B_TRUE, "Executing %s method (null).", |
37127716670e
Contributed by Mark Martin <storycrafter@gmail.com>.
lianep
parents:
5040
diff
changeset
|
776 mname); |
0 | 777 |
778 if (type == METHOD_START) | |
779 write_status(inst, mname, 0); | |
780 goto out; | |
781 } | |
782 | |
783 sig = restarter_is_kill_method(method); | |
784 if (sig >= 0) { | |
785 | |
786 if (inst->ri_i.i_primary_ctid == 0) { | |
787 log_error(LOG_ERR, "%s: :kill with no contract\n", | |
788 inst->ri_i.i_fmri); | |
5238
37127716670e
Contributed by Mark Martin <storycrafter@gmail.com>.
lianep
parents:
5040
diff
changeset
|
789 log_instance(inst, B_TRUE, "Invalid use of \":kill\" " |
37127716670e
Contributed by Mark Martin <storycrafter@gmail.com>.
lianep
parents:
5040
diff
changeset
|
790 "as stop method for transient service."); |
0 | 791 result = EINVAL; |
792 goto out; | |
793 } | |
794 | |
795 log_framework(LOG_DEBUG, | |
796 "%s: :killing contract with signal %d\n", | |
797 inst->ri_i.i_fmri, sig); | |
798 | |
5238
37127716670e
Contributed by Mark Martin <storycrafter@gmail.com>.
lianep
parents:
5040
diff
changeset
|
799 log_instance(inst, B_TRUE, "Executing %s method (:kill).", |
0 | 800 mname); |
801 | |
802 if (contract_kill(inst->ri_i.i_primary_ctid, sig, | |
803 inst->ri_i.i_fmri) != 0) { | |
804 result = EIO; | |
805 goto out; | |
806 } else | |
807 goto assured_kill; | |
808 } | |
809 | |
810 log_framework(LOG_DEBUG, "%s: forking to run method %s\n", | |
811 inst->ri_i.i_fmri, method); | |
812 | |
9765
2522fef20c5f
6215238 svc.startd could provide better log messages for faulty method_context contents
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
9263
diff
changeset
|
813 m_error = restarter_get_method_context(RESTARTER_METHOD_CONTEXT_VERSION, |
0 | 814 inst->ri_m_inst, snap, mname, method, &mcp); |
815 | |
9765
2522fef20c5f
6215238 svc.startd could provide better log messages for faulty method_context contents
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
9263
diff
changeset
|
816 if (m_error != NULL) { |
2522fef20c5f
6215238 svc.startd could provide better log messages for faulty method_context contents
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
9263
diff
changeset
|
817 log_instance(inst, B_TRUE, "%s", m_error->msg); |
2522fef20c5f
6215238 svc.startd could provide better log messages for faulty method_context contents
Sean Wilcox <Sean.Wilcox@Sun.COM>
parents:
9263
diff
changeset
|
818 restarter_mc_error_destroy(m_error); |
0 | 819 result = EINVAL; |
820 goto out; | |
821 } | |
822 | |
823 r = method_ready_contract(inst, type, restart_on, cte_mask); | |
824 if (r != 0) { | |
825 assert(r == ECANCELED); | |
826 assert(inst->ri_mi_deleted); | |
827 restarter_free_method_context(mcp); | |
828 result = ECANCELED; | |
829 goto out; | |
830 } | |
831 | |
832 /* | |
833 * Validate safety of method contexts, to save children work. | |
834 */ | |
835 if (!restarter_rm_libs_loadable()) | |
836 log_framework(LOG_DEBUG, "%s: method contexts limited " | |
837 "to root-accessible libraries\n", inst->ri_i.i_fmri); | |
838 | |
839 /* | |
14130
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
840 * For wait-style svc, sanity check that method exists to prevent an |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
841 * infinite loop. |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
842 */ |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
843 if (instance_is_wait_style(inst) && type == METHOD_START) { |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
844 char *pend; |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
845 struct stat64 sbuf; |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
846 |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
847 /* |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
848 * We need to handle start method strings that have arguments, |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
849 * such as '/lib/svc/method/console-login %i'. |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
850 */ |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
851 if ((pend = strchr(method, ' ')) != NULL) |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
852 *pend = '\0'; |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
853 |
14183
68927c785889
4099 SMF methods without absolute paths no longer work
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
14130
diff
changeset
|
854 if (*method == '/' && stat64(method, &sbuf) == -1 && |
68927c785889
4099 SMF methods without absolute paths no longer work
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
14130
diff
changeset
|
855 errno == ENOENT) { |
14130
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
856 log_instance(inst, B_TRUE, "Missing start method (%s), " |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
857 "changing state to maintenance.", method); |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
858 restarter_free_method_context(mcp); |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
859 result = ENOENT; |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
860 goto out; |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
861 } |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
862 if (pend != NULL) |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
863 *pend = ' '; |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
864 } |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
865 |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
866 /* |
0 | 867 * If the service is restarting too quickly, send it to |
868 * maintenance. | |
869 */ | |
870 if (type == METHOD_START) { | |
871 method_record_start(inst); | |
14130
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
872 if (method_rate_critical(inst) && |
3159d5f5b16a
3989 svc.startd gets stuck in a loop when HOME dir doesn't exist
Jerry Jelinek <jerry.jelinek@joyent.com>
parents:
13791
diff
changeset
|
873 !instance_is_wait_style(inst)) { |
0 | 874 log_instance(inst, B_TRUE, "Restarting too quickly, " |
5238
37127716670e
Contributed by Mark Martin <storycrafter@gmail.com>.
lianep
parents:
5040
diff
changeset
|
875 "changing state to maintenance."); |
0 | 876 result = ELOOP; |
3179
80729b9ca1d6
5079387 _get_auth_policy() doesn't provide corresponding free function
jeanm
parents:
1712
diff
changeset
|
877 restarter_free_method_context(mcp); |
0 | 878 goto out; |
879 } | |
880 } | |
881 | |
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
882 atomic_add_16(&storing_contract, 1); |
119
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
883 pid = startd_fork1(&forkerr); |
0 | 884 if (pid == 0) |
885 exec_method(inst, type, method, mcp, need_session); | |
886 | |
887 if (pid == -1) { | |
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
888 atomic_add_16(&storing_contract, -1); |
119
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
889 if (forkerr == EAGAIN) |
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
890 result = EAGAIN; |
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
891 else |
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
892 result = EFAULT; |
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
893 |
0 | 894 log_error(LOG_WARNING, |
119
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
895 "%s: Couldn't fork to execute method %s: %s\n", |
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
896 inst->ri_i.i_fmri, method, strerror(forkerr)); |
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
897 |
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
898 restarter_free_method_context(mcp); |
0 | 899 goto out; |
900 } | |
901 | |
902 | |
903 /* | |
904 * Get the contract id, decide whether it is primary or transient, and | |
905 * stash it in inst & the repository. | |
906 */ | |
907 method_store_contract(inst, type, &ctid); | |
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
908 atomic_add_16(&storing_contract, -1); |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
909 |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
910 restarter_free_method_context(mcp); |
0 | 911 |
912 /* | |
913 * Similarly for the start method PID. | |
914 */ | |
915 if (type == METHOD_START && !inst->ri_mi_deleted) | |
916 (void) libscf_write_start_pid(inst->ri_m_inst, pid); | |
917 | |
918 if (instance_is_wait_style(inst) && type == METHOD_START) { | |
919 /* Wait style instances don't get timeouts on start methods. */ | |
920 if (wait_register(pid, inst->ri_i.i_fmri, 1, 0)) { | |
921 log_error(LOG_WARNING, | |
922 "%s: couldn't register %ld for wait\n", | |
923 inst->ri_i.i_fmri, pid); | |
924 result = EFAULT; | |
925 goto contract_out; | |
926 } | |
927 write_status(inst, mname, 0); | |
928 | |
929 } else { | |
930 int r, err; | |
931 time_t start_time; | |
932 time_t end_time; | |
933 | |
934 /* | |
935 * Because on upgrade/live-upgrade we may have no chance | |
936 * to override faulty timeout values on the way to | |
937 * manifest import, all services on the path to manifest | |
938 * import are treated the same as INFINITE timeout services. | |
939 */ | |
940 | |
941 start_time = time(NULL); | |
942 if (timeout != METHOD_TIMEOUT_INFINITE && !is_timeout_ovr(inst)) | |
943 timeout_insert(inst, ctid, timeout); | |
944 else | |
945 timeout = METHOD_TIMEOUT_INFINITE; | |
946 | |
947 /* Unlock the instance while waiting for the method. */ | |
948 MUTEX_UNLOCK(&inst->ri_lock); | |
949 | |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
950 do { |
0 | 951 r = waitpid(pid, &ret_status, NULL); |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
952 } while (r == -1 && errno == EINTR); |
0 | 953 if (r == -1) |
954 err = errno; | |
955 | |
956 /* Re-grab the lock. */ | |
957 inst = inst_lookup_by_id(id); | |
958 | |
959 /* | |
960 * inst can't be removed, as the removal thread waits | |
961 * for completion of this one. | |
962 */ | |
963 assert(inst != NULL); | |
964 *instp = inst; | |
965 | |
966 if (inst->ri_timeout != NULL && inst->ri_timeout->te_fired) | |
967 timeout_fired = 1; | |
968 | |
969 timeout_remove(inst, ctid); | |
970 | |
971 log_framework(LOG_DEBUG, | |
972 "%s method for %s exited with status %d.\n", mname, | |
973 inst->ri_i.i_fmri, WEXITSTATUS(ret_status)); | |
974 | |
975 if (r == -1) { | |
976 log_error(LOG_WARNING, | |
977 "Couldn't waitpid() for %s method of %s (%s).\n", | |
978 mname, inst->ri_i.i_fmri, strerror(err)); | |
979 result = EFAULT; | |
980 goto contract_out; | |
981 } | |
982 | |
983 if (type == METHOD_START) | |
984 write_status(inst, mname, ret_status); | |
985 | |
986 /* return ERANGE if this service doesn't retry on timeout */ | |
987 if (timeout_fired == 1 && timeout_retry == 0) { | |
988 result = ERANGE; | |
989 goto contract_out; | |
990 } | |
991 | |
992 if (!WIFEXITED(ret_status)) { | |
993 /* | |
994 * If method didn't exit itself (it was killed by an | |
995 * external entity, etc.), consider the entire | |
996 * method_run as failed. | |
997 */ | |
998 if (WIFSIGNALED(ret_status)) { | |
999 char buf[SIG2STR_MAX]; | |
1000 (void) sig2str(WTERMSIG(ret_status), buf); | |
1001 | |
1002 log_error(LOG_WARNING, "%s: Method \"%s\" " | |
1003 "failed due to signal %s.\n", | |
1004 inst->ri_i.i_fmri, method, buf); | |
1005 log_instance(inst, B_TRUE, "Method \"%s\" " | |
5238
37127716670e
Contributed by Mark Martin <storycrafter@gmail.com>.
lianep
parents:
5040
diff
changeset
|
1006 "failed due to signal %s.", mname, buf); |
0 | 1007 } else { |
1008 log_error(LOG_WARNING, "%s: Method \"%s\" " | |
1009 "failed with exit status %d.\n", | |
1010 inst->ri_i.i_fmri, method, | |
1011 WEXITSTATUS(ret_status)); | |
1012 log_instance(inst, B_TRUE, "Method \"%s\" " | |
5238
37127716670e
Contributed by Mark Martin <storycrafter@gmail.com>.
lianep
parents:
5040
diff
changeset
|
1013 "failed with exit status %d.", mname, |
0 | 1014 WEXITSTATUS(ret_status)); |
1015 } | |
1016 result = EAGAIN; | |
1017 goto contract_out; | |
1018 } | |
1019 | |
1020 *exit_code = WEXITSTATUS(ret_status); | |
1021 if (*exit_code != 0) { | |
1022 log_error(LOG_WARNING, | |
1023 "%s: Method \"%s\" failed with exit status %d.\n", | |
1024 inst->ri_i.i_fmri, method, WEXITSTATUS(ret_status)); | |
1025 } | |
1026 | |
1027 log_instance(inst, B_TRUE, "Method \"%s\" exited with status " | |
5238
37127716670e
Contributed by Mark Martin <storycrafter@gmail.com>.
lianep
parents:
5040
diff
changeset
|
1028 "%d.", mname, *exit_code); |
0 | 1029 |
1030 if (*exit_code != 0) | |
1031 goto contract_out; | |
1032 | |
1033 end_time = time(NULL); | |
1034 | |
1035 /* Give service contract remaining seconds to empty */ | |
1036 if (timeout != METHOD_TIMEOUT_INFINITE) | |
1037 timeout -= (end_time - start_time); | |
1038 } | |
1039 | |
1040 assured_kill: | |
1041 /* | |
1042 * For stop methods, assure that the service contract has emptied | |
1043 * before returning. | |
1044 */ | |
1045 if (type == METHOD_STOP && (!instance_is_transient_style(inst)) && | |
1046 !(contract_is_empty(inst->ri_i.i_primary_ctid))) { | |
8944
e4ff744ddfe1
6805730 some simple changes would make 'init 5' much faster
Dan Price <dp@eng.sun.com>
parents:
6748
diff
changeset
|
1047 int times = 0; |
0 | 1048 |
1049 if (timeout != METHOD_TIMEOUT_INFINITE) | |
1050 timeout_insert(inst, inst->ri_i.i_primary_ctid, | |
1051 timeout); | |
1052 | |
1053 for (;;) { | |
8944
e4ff744ddfe1
6805730 some simple changes would make 'init 5' much faster
Dan Price <dp@eng.sun.com>
parents:
6748
diff
changeset
|
1054 /* |
e4ff744ddfe1
6805730 some simple changes would make 'init 5' much faster
Dan Price <dp@eng.sun.com>
parents:
6748
diff
changeset
|
1055 * Check frequently at first, then back off. This |
e4ff744ddfe1
6805730 some simple changes would make 'init 5' much faster
Dan Price <dp@eng.sun.com>
parents:
6748
diff
changeset
|
1056 * keeps startd from idling while shutting down. |
e4ff744ddfe1
6805730 some simple changes would make 'init 5' much faster
Dan Price <dp@eng.sun.com>
parents:
6748
diff
changeset
|
1057 */ |
e4ff744ddfe1
6805730 some simple changes would make 'init 5' much faster
Dan Price <dp@eng.sun.com>
parents:
6748
diff
changeset
|
1058 if (times < 20) { |
e4ff744ddfe1
6805730 some simple changes would make 'init 5' much faster
Dan Price <dp@eng.sun.com>
parents:
6748
diff
changeset
|
1059 (void) poll(NULL, 0, 5); |
e4ff744ddfe1
6805730 some simple changes would make 'init 5' much faster
Dan Price <dp@eng.sun.com>
parents:
6748
diff
changeset
|
1060 times++; |
e4ff744ddfe1
6805730 some simple changes would make 'init 5' much faster
Dan Price <dp@eng.sun.com>
parents:
6748
diff
changeset
|
1061 } else { |
e4ff744ddfe1
6805730 some simple changes would make 'init 5' much faster
Dan Price <dp@eng.sun.com>
parents:
6748
diff
changeset
|
1062 (void) poll(NULL, 0, 100); |
e4ff744ddfe1
6805730 some simple changes would make 'init 5' much faster
Dan Price <dp@eng.sun.com>
parents:
6748
diff
changeset
|
1063 } |
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
1064 if (contract_is_empty(inst->ri_i.i_primary_ctid)) |
0 | 1065 break; |
1066 } | |
1067 | |
1068 if (timeout != METHOD_TIMEOUT_INFINITE) | |
1069 if (inst->ri_timeout->te_fired) | |
1070 result = EFAULT; | |
1071 | |
1072 timeout_remove(inst, inst->ri_i.i_primary_ctid); | |
1073 } | |
1074 | |
1075 contract_out: | |
1076 /* Abandon contracts for transient methods & methods that fail. */ | |
1077 transient = method_is_transient(inst, type); | |
1078 if ((transient || *exit_code != 0 || result != 0) && | |
1079 (restarter_is_kill_method(method) < 0)) | |
1080 method_remove_contract(inst, !transient, B_TRUE); | |
1081 | |
1082 out: | |
1083 if (ctfd >= 0) | |
1084 (void) close(ctfd); | |
1085 scf_snapshot_destroy(snap); | |
1086 free(method); | |
1087 return (result); | |
1088 } | |
1089 | |
1090 /* | |
1091 * The method thread executes a service method to effect a state transition. | |
1092 * The next_state of info->sf_id should be non-_NONE on entrance, and it will | |
1093 * be _NONE on exit (state will either be what next_state was (on success), or | |
1094 * it will be _MAINT (on error)). | |
1095 * | |
1096 * There are six classes of methods to consider: start & other (stop, refresh) | |
1097 * for each of "normal" services, wait services, and transient services. For | |
1098 * each, the method must be fetched from the repository & executed. fork()ed | |
1099 * methods must be waited on, except for the start method of wait services | |
1100 * (which must be registered with the wait subsystem via wait_register()). If | |
1101 * the method succeeded (returned 0), then for start methods its contract | |
1102 * should be recorded as the primary contract for the service. For other | |
1103 * methods, it should be abandoned. If the method fails, then depending on | |
1104 * the failure, either the method should be reexecuted or the service should | |
1105 * be put into maintenance. Either way the contract should be abandoned. | |
1106 */ | |
1107 void * | |
1108 method_thread(void *arg) | |
1109 { | |
1110 fork_info_t *info = arg; | |
1111 restarter_inst_t *inst; | |
1112 scf_handle_t *local_handle; | |
1113 scf_instance_t *s_inst = NULL; | |
1114 int r, exit_code; | |
1115 boolean_t retryable; | |
12979
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11466
diff
changeset
|
1116 restarter_str_t reason; |
0 | 1117 |
1118 assert(0 <= info->sf_method_type && info->sf_method_type <= 2); | |
1119 | |
1120 /* Get (and lock) the restarter_inst_t. */ | |
1121 inst = inst_lookup_by_id(info->sf_id); | |
1122 | |
1123 assert(inst->ri_method_thread != 0); | |
1124 assert(instance_in_transition(inst) == 1); | |
1125 | |
1126 /* | |
1127 * We cannot leave this function with inst in transition, because | |
1128 * protocol.c withholds messages for inst otherwise. | |
1129 */ | |
1130 | |
1131 log_framework(LOG_DEBUG, "method_thread() running %s method for %s.\n", | |
1132 method_names[info->sf_method_type], inst->ri_i.i_fmri); | |
1133 | |
1134 local_handle = libscf_handle_create_bound_loop(); | |
1135 | |
1136 rebind_retry: | |
1137 /* get scf_instance_t */ | |
1138 switch (r = libscf_fmri_get_instance(local_handle, inst->ri_i.i_fmri, | |
1139 &s_inst)) { | |
1140 case 0: | |
1141 break; | |
1142 | |
1143 case ECONNABORTED: | |
1144 libscf_handle_rebind(local_handle); | |
1145 goto rebind_retry; | |
1146 | |
1147 case ENOENT: | |
1148 /* | |
1149 * It's not there, but we need to call this so protocol.c | |
1150 * doesn't think it's in transition anymore. | |
1151 */ | |
1152 (void) restarter_instance_update_states(local_handle, inst, | |
1153 inst->ri_i.i_state, RESTARTER_STATE_NONE, RERR_NONE, | |
12979
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11466
diff
changeset
|
1154 restarter_str_none); |
0 | 1155 goto out; |
1156 | |
1157 case EINVAL: | |
1158 case ENOTSUP: | |
1159 default: | |
1160 bad_error("libscf_fmri_get_instance", r); | |
1161 } | |
1162 | |
1163 inst->ri_m_inst = s_inst; | |
1164 inst->ri_mi_deleted = B_FALSE; | |
1165 | |
1166 retry: | |
1167 if (info->sf_method_type == METHOD_START) | |
1168 log_transition(inst, START_REQUESTED); | |
1169 | |
1170 r = method_run(&inst, info->sf_method_type, &exit_code); | |
1171 | |
1172 if (r == 0 && exit_code == 0) { | |
1173 /* Success! */ | |
1174 assert(inst->ri_i.i_next_state != RESTARTER_STATE_NONE); | |
1175 | |
1176 /* | |
1177 * When a stop method succeeds, remove the primary contract of | |
1178 * the service, unless we're going to offline, in which case | |
1179 * retain the contract so we can transfer inherited contracts to | |
1180 * the replacement service. | |
1181 */ | |
1182 | |
1183 if (info->sf_method_type == METHOD_STOP && | |
1184 inst->ri_i.i_primary_ctid != 0) { | |
1185 if (inst->ri_i.i_next_state == RESTARTER_STATE_OFFLINE) | |
1186 inst->ri_i.i_primary_ctid_stopped = 1; | |
1187 else | |
1188 method_remove_contract(inst, B_TRUE, B_TRUE); | |
1189 } | |
1190 /* | |
1191 * We don't care whether the handle was rebound because this is | |
1192 * the last thing we do with it. | |
1193 */ | |
1194 (void) restarter_instance_update_states(local_handle, inst, | |
1195 inst->ri_i.i_next_state, RESTARTER_STATE_NONE, | |
12979
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11466
diff
changeset
|
1196 info->sf_event_type, info->sf_reason); |
0 | 1197 |
1198 (void) update_fault_count(inst, FAULT_COUNT_RESET); | |
1199 | |
1200 goto out; | |
1201 } | |
1202 | |
1203 /* Failure. Retry or go to maintenance. */ | |
1204 | |
1205 if (r != 0 && r != EAGAIN) { | |
1206 retryable = B_FALSE; | |
1207 } else { | |
1208 switch (exit_code) { | |
1209 case SMF_EXIT_ERR_CONFIG: | |
1210 case SMF_EXIT_ERR_NOSMF: | |
1211 case SMF_EXIT_ERR_PERM: | |
1212 case SMF_EXIT_ERR_FATAL: | |
1213 retryable = B_FALSE; | |
1214 break; | |
1215 | |
1216 default: | |
1217 retryable = B_TRUE; | |
1218 } | |
1219 } | |
1220 | |
1221 if (retryable && update_fault_count(inst, FAULT_COUNT_INCR) != 1) | |
1222 goto retry; | |
1223 | |
1224 /* maintenance */ | |
1225 if (r == ELOOP) | |
1226 log_transition(inst, START_FAILED_REPEATEDLY); | |
1227 else if (r == ERANGE) | |
1228 log_transition(inst, START_FAILED_TIMEOUT_FATAL); | |
1229 else if (exit_code == SMF_EXIT_ERR_CONFIG) | |
1230 log_transition(inst, START_FAILED_CONFIGURATION); | |
1231 else if (exit_code == SMF_EXIT_ERR_FATAL) | |
1232 log_transition(inst, START_FAILED_FATAL); | |
1233 else | |
1234 log_transition(inst, START_FAILED_OTHER); | |
1235 | |
12979
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11466
diff
changeset
|
1236 if (r == ELOOP) { |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11466
diff
changeset
|
1237 reason = restarter_str_restarting_too_quickly; |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11466
diff
changeset
|
1238 } else if (retryable) { |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11466
diff
changeset
|
1239 reason = restarter_str_fault_threshold_reached; |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11466
diff
changeset
|
1240 } else { |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11466
diff
changeset
|
1241 reason = restarter_str_method_failed; |
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11466
diff
changeset
|
1242 } |
0 | 1243 |
1244 (void) restarter_instance_update_states(local_handle, inst, | |
1245 RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_FAULT, | |
12979
ab9ae749152f
PSARC/2009/617 Software Events Notification Parameters CLI
Gavin Maltby <gavin.maltby@oracle.com>
parents:
11466
diff
changeset
|
1246 reason); |
0 | 1247 |
1248 if (!method_is_transient(inst, info->sf_method_type) && | |
1249 inst->ri_i.i_primary_ctid != 0) | |
1250 method_remove_contract(inst, B_TRUE, B_TRUE); | |
1251 | |
1252 out: | |
1253 inst->ri_method_thread = 0; | |
6748
6c7adefd809e
6705022 *svc.startd* coredumps in restarter_insert_inst()
rm88369
parents:
6073
diff
changeset
|
1254 |
6c7adefd809e
6705022 *svc.startd* coredumps in restarter_insert_inst()
rm88369
parents:
6073
diff
changeset
|
1255 /* |
6c7adefd809e
6705022 *svc.startd* coredumps in restarter_insert_inst()
rm88369
parents:
6073
diff
changeset
|
1256 * Unlock the mutex after broadcasting to avoid a race condition |
6c7adefd809e
6705022 *svc.startd* coredumps in restarter_insert_inst()
rm88369
parents:
6073
diff
changeset
|
1257 * with restarter_delete_inst() when the 'inst' structure is freed. |
6c7adefd809e
6705022 *svc.startd* coredumps in restarter_insert_inst()
rm88369
parents:
6073
diff
changeset
|
1258 */ |
6c7adefd809e
6705022 *svc.startd* coredumps in restarter_insert_inst()
rm88369
parents:
6073
diff
changeset
|
1259 (void) pthread_cond_broadcast(&inst->ri_method_cv); |
0 | 1260 MUTEX_UNLOCK(&inst->ri_lock); |
1261 | |
1262 scf_instance_destroy(s_inst); | |
1263 scf_handle_destroy(local_handle); | |
1264 startd_free(info, sizeof (fork_info_t)); | |
1265 return (NULL); | |
1266 } |