Mercurial > illumos > fmac
annotate usr/src/uts/common/syscall/lgrpsys.c @ 7887:f9ded24b041a
[fmac-discuss] [PATCH] Fix more cstyle issues
Fix more cstyle issues introduced by prior patches, in particular the
hasprocperm patch and the secctx patch.
uts/common/syscall/lgrpsys.c is still not clean with regard to
continuation indentation but the remaining warnings were not introduced
by our patches.
author | Stephen Smalley <sds@tycho.nsa.gov> |
---|---|
date | Fri, 17 Oct 2008 13:28:50 -0400 |
parents | e3677dd00778 |
children |
rev | line source |
---|---|
0 | 1 /* |
2 * CDDL HEADER START | |
3 * | |
4 * The contents of this file are subject to the terms of the | |
2685 | 5 * Common Development and Distribution License (the "License"). |
6 * You may not use this file except in compliance with the License. | |
0 | 7 * |
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 * or http://www.opensolaris.org/os/licensing. | |
10 * See the License for the specific language governing permissions | |
11 * and limitations under the License. | |
12 * | |
13 * When distributing Covered Code, include this CDDL HEADER in each | |
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 * If applicable, add the following below this CDDL HEADER, with the | |
16 * fields enclosed by brackets "[]" replaced with your own identifying | |
17 * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 * | |
19 * CDDL HEADER END | |
20 */ | |
2685 | 21 |
0 | 22 /* |
2685 | 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. |
0 | 24 * Use is subject to license terms. |
25 */ | |
26 | |
27 #pragma ident "%Z%%M% %I% %E% SMI" | |
28 | |
29 /* | |
30 * lgroup system calls | |
31 */ | |
32 | |
33 #include <sys/types.h> | |
34 #include <sys/errno.h> | |
35 #include <sys/sunddi.h> | |
36 #include <sys/systm.h> | |
37 #include <sys/mman.h> | |
38 #include <sys/cpupart.h> | |
39 #include <sys/lgrp.h> | |
40 #include <sys/lgrp_user.h> | |
7873
e3677dd00778
Mediate some process operations
Stephen Smalley <sds@tycho.nsa.gov>
parents:
2988
diff
changeset
|
41 #include <sys/fmac/av_permissions.h> |
0 | 42 #include <sys/promif.h> /* for prom_printf() */ |
43 #include <sys/sysmacros.h> | |
44 | |
45 #include <vm/as.h> | |
46 | |
47 | |
48 /* definitions for mi_validity */ | |
49 #define VALID_ADDR 1 | |
50 #define VALID_REQ 2 | |
51 | |
52 /* | |
53 * run through the given number of addresses and requests and return the | |
54 * corresponding memory information for each address | |
55 */ | |
56 static int | |
57 meminfo(int addr_count, struct meminfo *mip) | |
58 { | |
59 size_t in_size, out_size, req_size, val_size; | |
60 struct as *as; | |
61 struct hat *hat; | |
62 int i, j, out_idx, info_count; | |
63 lgrp_t *lgrp; | |
64 pfn_t pfn; | |
65 ssize_t pgsz; | |
66 int *req_array, *val_array; | |
67 uint64_t *in_array, *out_array; | |
68 uint64_t addr, paddr; | |
69 uintptr_t vaddr; | |
70 int ret = 0; | |
71 struct meminfo minfo; | |
72 #if defined(_SYSCALL32_IMPL) | |
73 struct meminfo32 minfo32; | |
74 #endif | |
75 | |
76 /* | |
77 * Make sure that there is at least one address to translate and | |
78 * limit how many virtual addresses the kernel can do per call | |
79 */ | |
80 if (addr_count < 1) | |
81 return (set_errno(EINVAL)); | |
82 else if (addr_count > MAX_MEMINFO_CNT) | |
83 addr_count = MAX_MEMINFO_CNT; | |
84 | |
85 if (get_udatamodel() == DATAMODEL_NATIVE) { | |
86 if (copyin(mip, &minfo, sizeof (struct meminfo))) | |
87 return (set_errno(EFAULT)); | |
88 } | |
89 #if defined(_SYSCALL32_IMPL) | |
90 else { | |
91 bzero(&minfo, sizeof (minfo)); | |
92 if (copyin(mip, &minfo32, sizeof (struct meminfo32))) | |
93 return (set_errno(EFAULT)); | |
94 minfo.mi_inaddr = (const uint64_t *)(uintptr_t) | |
95 minfo32.mi_inaddr; | |
96 minfo.mi_info_req = (const uint_t *)(uintptr_t) | |
97 minfo32.mi_info_req; | |
98 minfo.mi_info_count = minfo32.mi_info_count; | |
99 minfo.mi_outdata = (uint64_t *)(uintptr_t) | |
100 minfo32.mi_outdata; | |
101 minfo.mi_validity = (uint_t *)(uintptr_t) | |
102 minfo32.mi_validity; | |
103 } | |
104 #endif | |
105 /* | |
106 * all the input parameters have been copied in:- | |
107 * addr_count - number of input addresses | |
108 * minfo.mi_inaddr - array of input addresses | |
109 * minfo.mi_info_req - array of types of information requested | |
110 * minfo.mi_info_count - no. of pieces of info requested for each addr | |
111 * minfo.mi_outdata - array into which the results are placed | |
112 * minfo.mi_validity - array containing bitwise result codes; 0th bit | |
113 * evaluates validity of corresponding input | |
114 * address, 1st bit validity of response to first | |
115 * member of info_req, etc. | |
116 */ | |
117 | |
118 /* make sure mi_info_count is within limit */ | |
119 info_count = minfo.mi_info_count; | |
120 if (info_count < 1 || info_count > MAX_MEMINFO_REQ) | |
121 return (set_errno(EINVAL)); | |
122 | |
123 /* | |
124 * allocate buffer in_array for the input addresses and copy them in | |
125 */ | |
126 in_size = sizeof (uint64_t) * addr_count; | |
127 in_array = kmem_alloc(in_size, KM_SLEEP); | |
128 if (copyin(minfo.mi_inaddr, in_array, in_size)) { | |
129 kmem_free(in_array, in_size); | |
130 return (set_errno(EFAULT)); | |
131 } | |
132 | |
133 /* | |
134 * allocate buffer req_array for the input info_reqs and copy them in | |
135 */ | |
136 req_size = sizeof (uint_t) * info_count; | |
137 req_array = kmem_alloc(req_size, KM_SLEEP); | |
138 if (copyin(minfo.mi_info_req, req_array, req_size)) { | |
139 kmem_free(req_array, req_size); | |
140 kmem_free(in_array, in_size); | |
141 return (set_errno(EFAULT)); | |
142 } | |
143 | |
144 /* | |
145 * allocate buffer out_array which holds the results and will have | |
146 * to be copied out later | |
147 */ | |
148 out_size = sizeof (uint64_t) * addr_count * info_count; | |
149 out_array = kmem_alloc(out_size, KM_SLEEP); | |
150 | |
151 /* | |
152 * allocate buffer val_array which holds the validity bits and will | |
153 * have to be copied out later | |
154 */ | |
155 val_size = sizeof (uint_t) * addr_count; | |
156 val_array = kmem_alloc(val_size, KM_SLEEP); | |
157 | |
158 if ((req_array[0] & MEMINFO_MASK) == MEMINFO_PLGRP) { | |
159 /* find the corresponding lgroup for each physical address */ | |
160 for (i = 0; i < addr_count; i++) { | |
161 paddr = in_array[i]; | |
162 pfn = btop(paddr); | |
163 lgrp = lgrp_pfn_to_lgrp(pfn); | |
164 if (lgrp) { | |
165 out_array[i] = lgrp->lgrp_id; | |
166 val_array[i] = VALID_ADDR | VALID_REQ; | |
167 } else { | |
168 out_array[i] = NULL; | |
169 val_array[i] = 0; | |
170 } | |
171 } | |
172 } else { | |
173 /* get the corresponding memory info for each virtual address */ | |
174 as = curproc->p_as; | |
175 | |
176 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); | |
177 hat = as->a_hat; | |
178 for (i = out_idx = 0; i < addr_count; i++, out_idx += | |
179 info_count) { | |
180 addr = in_array[i]; | |
181 vaddr = (uintptr_t)(addr & ~PAGEOFFSET); | |
182 if (!as_segat(as, (caddr_t)vaddr)) { | |
183 val_array[i] = 0; | |
184 continue; | |
185 } | |
186 val_array[i] = VALID_ADDR; | |
187 pfn = hat_getpfnum(hat, (caddr_t)vaddr); | |
188 if (pfn != PFN_INVALID) { | |
189 paddr = (uint64_t)((pfn << PAGESHIFT) | | |
190 (addr & PAGEOFFSET)); | |
191 for (j = 0; j < info_count; j++) { | |
192 switch (req_array[j] & MEMINFO_MASK) { | |
193 case MEMINFO_VPHYSICAL: | |
194 /* | |
195 * return the physical address | |
196 * corresponding to the input | |
197 * virtual address | |
198 */ | |
199 out_array[out_idx + j] = paddr; | |
200 val_array[i] |= VALID_REQ << j; | |
201 break; | |
202 case MEMINFO_VLGRP: | |
203 /* | |
204 * return the lgroup of physical | |
205 * page corresponding to the | |
206 * input virtual address | |
207 */ | |
208 lgrp = lgrp_pfn_to_lgrp(pfn); | |
209 if (lgrp) { | |
210 out_array[out_idx + j] = | |
211 lgrp->lgrp_id; | |
212 val_array[i] |= | |
213 VALID_REQ << j; | |
214 } | |
215 break; | |
216 case MEMINFO_VPAGESIZE: | |
217 /* | |
218 * return the size of physical | |
219 * page corresponding to the | |
220 * input virtual address | |
221 */ | |
222 pgsz = hat_getpagesize(hat, | |
223 (caddr_t)vaddr); | |
224 if (pgsz != -1) { | |
225 out_array[out_idx + j] = | |
226 pgsz; | |
227 val_array[i] |= | |
228 VALID_REQ << j; | |
229 } | |
230 break; | |
231 case MEMINFO_VREPLCNT: | |
232 /* | |
233 * for future use:- | |
234 * return the no. replicated | |
235 * physical pages corresponding | |
236 * to the input virtual address, | |
237 * so it is always 0 at the | |
238 * moment | |
239 */ | |
240 out_array[out_idx + j] = 0; | |
241 val_array[i] |= VALID_REQ << j; | |
242 break; | |
243 case MEMINFO_VREPL: | |
244 /* | |
245 * for future use:- | |
246 * return the nth physical | |
247 * replica of the specified | |
248 * virtual address | |
249 */ | |
250 break; | |
251 case MEMINFO_VREPL_LGRP: | |
252 /* | |
253 * for future use:- | |
254 * return the lgroup of nth | |
255 * physical replica of the | |
256 * specified virtual address | |
257 */ | |
258 break; | |
259 case MEMINFO_PLGRP: | |
260 /* | |
261 * this is for physical address | |
262 * only, shouldn't mix with | |
263 * virtual address | |
264 */ | |
265 break; | |
266 default: | |
267 break; | |
268 } | |
269 } | |
270 } | |
271 } | |
272 AS_LOCK_EXIT(as, &as->a_lock); | |
273 } | |
274 | |
275 /* copy out the results and validity bits and free the buffers */ | |
276 if ((copyout(out_array, minfo.mi_outdata, out_size) != 0) || | |
277 (copyout(val_array, minfo.mi_validity, val_size) != 0)) | |
278 ret = set_errno(EFAULT); | |
279 | |
280 kmem_free(in_array, in_size); | |
281 kmem_free(out_array, out_size); | |
282 kmem_free(req_array, req_size); | |
283 kmem_free(val_array, val_size); | |
284 | |
285 return (ret); | |
286 } | |
287 | |
288 | |
289 /* | |
290 * Initialize lgroup affinities for thread | |
291 */ | |
292 void | |
293 lgrp_affinity_init(lgrp_affinity_t **bufaddr) | |
294 { | |
295 if (bufaddr) | |
296 *bufaddr = NULL; | |
297 } | |
298 | |
299 | |
300 /* | |
301 * Free lgroup affinities for thread and set to NULL | |
302 * just in case thread gets recycled | |
303 */ | |
304 void | |
305 lgrp_affinity_free(lgrp_affinity_t **bufaddr) | |
306 { | |
307 if (bufaddr && *bufaddr) { | |
308 kmem_free(*bufaddr, nlgrpsmax * sizeof (lgrp_affinity_t)); | |
309 *bufaddr = NULL; | |
310 } | |
311 } | |
312 | |
313 | |
314 #define P_ANY -2 /* cookie specifying any ID */ | |
315 | |
316 | |
317 /* | |
318 * Find LWP with given ID in specified process and get its affinity for | |
319 * specified lgroup | |
320 */ | |
321 lgrp_affinity_t | |
322 lgrp_affinity_get_thread(proc_t *p, id_t lwpid, lgrp_id_t lgrp) | |
323 { | |
324 lgrp_affinity_t aff; | |
325 int found; | |
326 kthread_t *t; | |
327 | |
328 ASSERT(MUTEX_HELD(&p->p_lock)); | |
329 | |
330 aff = LGRP_AFF_NONE; | |
331 found = 0; | |
332 t = p->p_tlist; | |
333 /* | |
334 * The process may be executing in proc_exit() and its p->p_list may be | |
335 * already NULL. | |
336 */ | |
337 if (t == NULL) | |
338 return (set_errno(ESRCH)); | |
339 | |
340 do { | |
341 if (t->t_tid == lwpid || lwpid == P_ANY) { | |
342 thread_lock(t); | |
343 /* | |
344 * Check to see whether caller has permission to set | |
345 * affinity for LWP | |
346 */ | |
7873
e3677dd00778
Mediate some process operations
Stephen Smalley <sds@tycho.nsa.gov>
parents:
2988
diff
changeset
|
347 if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED(), |
7887
f9ded24b041a
[fmac-discuss] [PATCH] Fix more cstyle issues
Stephen Smalley <sds@tycho.nsa.gov>
parents:
7873
diff
changeset
|
348 PROCESS__SETSCHED)) { |
0 | 349 thread_unlock(t); |
350 return (set_errno(EPERM)); | |
351 } | |
352 | |
353 if (t->t_lgrp_affinity) | |
354 aff = t->t_lgrp_affinity[lgrp]; | |
355 thread_unlock(t); | |
356 found = 1; | |
357 break; | |
358 } | |
359 } while ((t = t->t_forw) != p->p_tlist); | |
360 if (!found) | |
361 aff = set_errno(ESRCH); | |
362 | |
363 return (aff); | |
364 } | |
365 | |
366 | |
367 /* | |
368 * Get lgroup affinity for given LWP | |
369 */ | |
370 lgrp_affinity_t | |
371 lgrp_affinity_get(lgrp_affinity_args_t *ap) | |
372 { | |
373 lgrp_affinity_t aff; | |
374 lgrp_affinity_args_t args; | |
375 id_t id; | |
376 idtype_t idtype; | |
377 lgrp_id_t lgrp; | |
378 proc_t *p; | |
379 kthread_t *t; | |
380 | |
381 /* | |
382 * Copyin arguments | |
383 */ | |
384 if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0) | |
385 return (set_errno(EFAULT)); | |
386 | |
387 id = args.id; | |
388 idtype = args.idtype; | |
389 lgrp = args.lgrp; | |
390 | |
391 /* | |
392 * Check for invalid lgroup | |
393 */ | |
394 if (lgrp < 0 || lgrp == LGRP_NONE) | |
395 return (set_errno(EINVAL)); | |
396 | |
397 /* | |
398 * Check for existing lgroup | |
399 */ | |
400 if (lgrp > lgrp_alloc_max) | |
401 return (set_errno(ESRCH)); | |
402 | |
403 /* | |
404 * Get lgroup affinity for given LWP or process | |
405 */ | |
406 switch (idtype) { | |
407 | |
408 case P_LWPID: | |
409 /* | |
410 * LWP in current process | |
411 */ | |
412 p = curproc; | |
413 mutex_enter(&p->p_lock); | |
414 if (id != P_MYID) /* different thread */ | |
415 aff = lgrp_affinity_get_thread(p, id, lgrp); | |
416 else { /* current thread */ | |
417 aff = LGRP_AFF_NONE; | |
418 t = curthread; | |
419 thread_lock(t); | |
420 if (t->t_lgrp_affinity) | |
421 aff = t->t_lgrp_affinity[lgrp]; | |
422 thread_unlock(t); | |
423 } | |
424 mutex_exit(&p->p_lock); | |
425 break; | |
426 | |
427 case P_PID: | |
428 /* | |
429 * Process | |
430 */ | |
431 mutex_enter(&pidlock); | |
432 | |
433 if (id == P_MYID) | |
434 p = curproc; | |
435 else { | |
436 p = prfind(id); | |
437 if (p == NULL) { | |
438 mutex_exit(&pidlock); | |
439 return (set_errno(ESRCH)); | |
440 } | |
441 } | |
442 | |
443 mutex_enter(&p->p_lock); | |
444 aff = lgrp_affinity_get_thread(p, P_ANY, lgrp); | |
445 mutex_exit(&p->p_lock); | |
446 | |
447 mutex_exit(&pidlock); | |
448 break; | |
449 | |
450 default: | |
451 aff = set_errno(EINVAL); | |
452 break; | |
453 } | |
454 | |
455 return (aff); | |
456 } | |
457 | |
458 | |
459 /* | |
460 * Find lgroup for which this thread has most affinity in specified partition | |
2988
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
461 * starting from home lgroup unless specified starting lgroup is preferred |
0 | 462 */ |
463 lpl_t * | |
2988
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
464 lgrp_affinity_best(kthread_t *t, struct cpupart *cpupart, lgrp_id_t start, |
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
465 boolean_t prefer_start) |
0 | 466 { |
467 lgrp_affinity_t *affs; | |
468 lgrp_affinity_t best_aff; | |
469 lpl_t *best_lpl; | |
2988
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
470 lgrp_id_t finish; |
0 | 471 lgrp_id_t home; |
472 lgrp_id_t lgrpid; | |
473 lpl_t *lpl; | |
474 | |
475 ASSERT(t != NULL); | |
476 ASSERT((MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0) || | |
477 (MUTEX_HELD(&ttoproc(t)->p_lock) && THREAD_LOCK_HELD(t))); | |
478 ASSERT(cpupart != NULL); | |
479 | |
480 if (t->t_lgrp_affinity == NULL) | |
481 return (NULL); | |
482 | |
483 affs = t->t_lgrp_affinity; | |
484 | |
485 /* | |
486 * Thread bound to CPU | |
487 */ | |
488 if (t->t_bind_cpu != PBIND_NONE) { | |
489 cpu_t *cp; | |
490 | |
491 /* | |
2988
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
492 * Find which lpl has most affinity among leaf lpl directly |
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
493 * containing CPU and its ancestor lpls |
0 | 494 */ |
495 cp = cpu[t->t_bind_cpu]; | |
2988
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
496 |
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
497 best_lpl = lpl = cp->cpu_lpl; |
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
498 best_aff = affs[best_lpl->lpl_lgrpid]; |
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
499 while (lpl->lpl_parent != NULL) { |
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
500 lpl = lpl->lpl_parent; |
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
501 lgrpid = lpl->lpl_lgrpid; |
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
502 if (affs[lgrpid] > best_aff) { |
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
503 best_lpl = lpl; |
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
504 best_aff = affs[lgrpid]; |
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
505 } |
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
506 } |
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
507 return (best_lpl); |
0 | 508 } |
509 | |
510 /* | |
2988
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
511 * Start searching from home lgroup unless given starting lgroup is |
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
512 * preferred or home lgroup isn't in given pset. Use root lgroup as |
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
513 * starting point if both home and starting lgroups aren't in given |
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
514 * pset. |
0 | 515 */ |
516 ASSERT(start >= 0 && start <= lgrp_alloc_max); | |
2988
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
517 home = t->t_lpl->lpl_lgrpid; |
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
518 if (!prefer_start && LGRP_CPUS_IN_PART(home, cpupart)) |
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
519 lgrpid = home; |
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
520 else if (start != LGRP_NONE && LGRP_CPUS_IN_PART(start, cpupart)) |
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
521 lgrpid = start; |
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
522 else |
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
523 lgrpid = LGRP_ROOTID; |
0 | 524 |
2988
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
525 best_lpl = &cpupart->cp_lgrploads[lgrpid]; |
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
526 best_aff = affs[lgrpid]; |
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
527 finish = lgrpid; |
0 | 528 do { |
529 /* | |
530 * Skip any lgroups that don't have CPU resources | |
531 * in this processor set. | |
532 */ | |
533 if (!LGRP_CPUS_IN_PART(lgrpid, cpupart)) { | |
534 if (++lgrpid > lgrp_alloc_max) | |
535 lgrpid = 0; /* wrap the search */ | |
536 continue; | |
537 } | |
538 | |
539 /* | |
540 * Find lgroup with most affinity | |
541 */ | |
542 lpl = &cpupart->cp_lgrploads[lgrpid]; | |
543 if (affs[lgrpid] > best_aff) { | |
2685 | 544 best_aff = affs[lgrpid]; |
0 | 545 best_lpl = lpl; |
546 } | |
547 | |
548 if (++lgrpid > lgrp_alloc_max) | |
549 lgrpid = 0; /* wrap the search */ | |
550 | |
2988
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
551 } while (lgrpid != finish); |
0 | 552 |
553 /* | |
554 * No lgroup (in this pset) with any affinity | |
555 */ | |
556 if (best_aff == LGRP_AFF_NONE) | |
557 return (NULL); | |
558 | |
559 lgrpid = best_lpl->lpl_lgrpid; | |
560 ASSERT(LGRP_CPUS_IN_PART(lgrpid, cpupart) && best_lpl->lpl_ncpu > 0); | |
561 | |
562 return (best_lpl); | |
563 } | |
564 | |
565 | |
566 /* | |
567 * Set thread's affinity for given lgroup | |
568 */ | |
569 int | |
570 lgrp_affinity_set_thread(kthread_t *t, lgrp_id_t lgrp, lgrp_affinity_t aff, | |
571 lgrp_affinity_t **aff_buf) | |
572 { | |
2685 | 573 lgrp_affinity_t *affs; |
574 lgrp_id_t best; | |
0 | 575 lpl_t *best_lpl; |
576 lgrp_id_t home; | |
577 int retval; | |
578 | |
579 ASSERT(t != NULL); | |
580 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); | |
581 | |
582 retval = 0; | |
583 | |
584 thread_lock(t); | |
585 | |
586 /* | |
587 * Check to see whether caller has permission to set affinity for | |
588 * thread | |
589 */ | |
7873
e3677dd00778
Mediate some process operations
Stephen Smalley <sds@tycho.nsa.gov>
parents:
2988
diff
changeset
|
590 if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED(), |
7887
f9ded24b041a
[fmac-discuss] [PATCH] Fix more cstyle issues
Stephen Smalley <sds@tycho.nsa.gov>
parents:
7873
diff
changeset
|
591 PROCESS__SETSCHED)) { |
0 | 592 thread_unlock(t); |
593 return (set_errno(EPERM)); | |
594 } | |
595 | |
596 if (t->t_lgrp_affinity == NULL) { | |
597 if (aff == LGRP_AFF_NONE) { | |
598 thread_unlock(t); | |
599 return (0); | |
600 } | |
601 ASSERT(aff_buf != NULL && *aff_buf != NULL); | |
602 t->t_lgrp_affinity = *aff_buf; | |
603 *aff_buf = NULL; | |
604 } | |
605 | |
2685 | 606 affs = t->t_lgrp_affinity; |
607 affs[lgrp] = aff; | |
0 | 608 |
609 /* | |
610 * Find lgroup for which thread has most affinity, | |
2685 | 611 * starting with lgroup for which affinity being set |
612 */ | |
2988
5c29819d6612
6469235 s10u3_06;Panic lgrp->lgrp_parent->lgrp_id == lpl->lpl_parent->lpl_lgrpid, file: ../../common/os/lgrp
jjc
parents:
2685
diff
changeset
|
613 best_lpl = lgrp_affinity_best(t, t->t_cpupart, lgrp, B_TRUE); |
2685 | 614 |
615 /* | |
616 * Rehome if found lgroup with more affinity than home or lgroup for | |
617 * which affinity is being set has same affinity as home | |
0 | 618 */ |
619 home = t->t_lpl->lpl_lgrpid; | |
2685 | 620 if (best_lpl != NULL && best_lpl != t->t_lpl) { |
621 best = best_lpl->lpl_lgrpid; | |
622 if (affs[best] > affs[home] || (affs[best] == affs[home] && | |
623 best == lgrp)) | |
624 lgrp_move_thread(t, best_lpl, 1); | |
625 } | |
0 | 626 |
627 thread_unlock(t); | |
628 | |
629 return (retval); | |
630 } | |
631 | |
632 | |
633 /* | |
634 * Set process' affinity for specified lgroup | |
635 */ | |
636 int | |
637 lgrp_affinity_set_proc(proc_t *p, lgrp_id_t lgrp, lgrp_affinity_t aff, | |
638 lgrp_affinity_t **aff_buf_array) | |
639 { | |
640 lgrp_affinity_t *buf; | |
641 int err = 0; | |
642 int i; | |
643 int retval; | |
644 kthread_t *t; | |
645 | |
646 ASSERT(MUTEX_HELD(&pidlock) && MUTEX_HELD(&p->p_lock)); | |
647 ASSERT(aff_buf_array != NULL); | |
648 | |
649 i = 0; | |
650 t = p->p_tlist; | |
651 if (t != NULL) { | |
652 do { | |
653 /* | |
654 * Set lgroup affinity for thread | |
655 */ | |
656 buf = aff_buf_array[i]; | |
657 retval = lgrp_affinity_set_thread(t, lgrp, aff, &buf); | |
658 | |
659 if (err == 0 && retval != 0) | |
660 err = retval; | |
661 | |
662 /* | |
663 * Advance pointer to next buffer | |
664 */ | |
665 if (buf == NULL) { | |
666 ASSERT(i < p->p_lwpcnt); | |
667 aff_buf_array[i] = NULL; | |
668 i++; | |
669 } | |
670 | |
671 } while ((t = t->t_forw) != p->p_tlist); | |
672 } | |
673 return (err); | |
674 } | |
675 | |
676 | |
677 /* | |
678 * Set LWP's or process' affinity for specified lgroup | |
679 * | |
680 * When setting affinities, pidlock, process p_lock, and thread_lock() | |
681 * need to be held in that order to protect target thread's pset, process, | |
682 * process contents, and thread contents. thread_lock() does splhigh(), | |
683 * so it ends up having similiar effect as kpreempt_disable(), so it will | |
684 * protect calls to lgrp_move_thread() and lgrp_choose() from pset changes. | |
685 */ | |
686 int | |
687 lgrp_affinity_set(lgrp_affinity_args_t *ap) | |
688 { | |
689 lgrp_affinity_t aff; | |
690 lgrp_affinity_t *aff_buf; | |
691 lgrp_affinity_args_t args; | |
692 id_t id; | |
693 idtype_t idtype; | |
694 lgrp_id_t lgrp; | |
695 int nthreads; | |
696 proc_t *p; | |
697 int retval; | |
698 | |
699 /* | |
700 * Copyin arguments | |
701 */ | |
702 if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0) | |
703 return (set_errno(EFAULT)); | |
704 | |
705 idtype = args.idtype; | |
706 id = args.id; | |
707 lgrp = args.lgrp; | |
708 aff = args.aff; | |
709 | |
710 /* | |
711 * Check for invalid lgroup | |
712 */ | |
713 if (lgrp < 0 || lgrp == LGRP_NONE) | |
714 return (set_errno(EINVAL)); | |
715 | |
716 /* | |
717 * Check for existing lgroup | |
718 */ | |
719 if (lgrp > lgrp_alloc_max) | |
720 return (set_errno(ESRCH)); | |
721 | |
722 /* | |
723 * Check for legal affinity | |
724 */ | |
725 if (aff != LGRP_AFF_NONE && aff != LGRP_AFF_WEAK && | |
726 aff != LGRP_AFF_STRONG) | |
727 return (set_errno(EINVAL)); | |
728 | |
729 /* | |
730 * Must be process or LWP ID | |
731 */ | |
732 if (idtype != P_LWPID && idtype != P_PID) | |
733 return (set_errno(EINVAL)); | |
734 | |
735 /* | |
736 * Set given LWP's or process' affinity for specified lgroup | |
737 */ | |
738 switch (idtype) { | |
739 | |
740 case P_LWPID: | |
741 /* | |
742 * Allocate memory for thread's lgroup affinities | |
743 * ahead of time w/o holding locks | |
744 */ | |
745 aff_buf = kmem_zalloc(nlgrpsmax * sizeof (lgrp_affinity_t), | |
746 KM_SLEEP); | |
747 | |
748 p = curproc; | |
749 | |
750 /* | |
751 * Set affinity for thread | |
752 */ | |
753 mutex_enter(&p->p_lock); | |
754 if (id == P_MYID) { /* current thread */ | |
755 retval = lgrp_affinity_set_thread(curthread, lgrp, aff, | |
756 &aff_buf); | |
757 } else if (p->p_tlist == NULL) { | |
758 retval = set_errno(ESRCH); | |
759 } else { /* other thread */ | |
760 int found = 0; | |
761 kthread_t *t; | |
762 | |
763 t = p->p_tlist; | |
764 do { | |
765 if (t->t_tid == id) { | |
766 retval = lgrp_affinity_set_thread(t, | |
767 lgrp, aff, &aff_buf); | |
768 found = 1; | |
769 break; | |
770 } | |
771 } while ((t = t->t_forw) != p->p_tlist); | |
772 if (!found) | |
773 retval = set_errno(ESRCH); | |
774 } | |
775 mutex_exit(&p->p_lock); | |
776 | |
777 /* | |
778 * Free memory for lgroup affinities, | |
779 * since thread didn't need it | |
780 */ | |
781 if (aff_buf) | |
782 kmem_free(aff_buf, | |
783 nlgrpsmax * sizeof (lgrp_affinity_t)); | |
784 | |
785 break; | |
786 | |
787 case P_PID: | |
788 | |
789 do { | |
790 lgrp_affinity_t **aff_buf_array; | |
791 int i; | |
792 size_t size; | |
793 | |
794 /* | |
795 * Get process | |
796 */ | |
797 mutex_enter(&pidlock); | |
798 | |
799 if (id == P_MYID) | |
800 p = curproc; | |
801 else | |
802 p = prfind(id); | |
803 | |
804 if (p == NULL) { | |
805 mutex_exit(&pidlock); | |
806 return (set_errno(ESRCH)); | |
807 } | |
808 | |
809 /* | |
810 * Get number of threads in process | |
811 * | |
812 * NOTE: Only care about user processes, | |
813 * so p_lwpcnt should be number of threads. | |
814 */ | |
815 mutex_enter(&p->p_lock); | |
816 nthreads = p->p_lwpcnt; | |
817 mutex_exit(&p->p_lock); | |
818 | |
819 mutex_exit(&pidlock); | |
820 | |
821 if (nthreads < 1) | |
822 return (set_errno(ESRCH)); | |
823 | |
824 /* | |
825 * Preallocate memory for lgroup affinities for | |
826 * each thread in process now to avoid holding | |
827 * any locks. Allocate an array to hold a buffer | |
828 * for each thread. | |
829 */ | |
830 aff_buf_array = kmem_zalloc(nthreads * | |
831 sizeof (lgrp_affinity_t *), KM_SLEEP); | |
832 | |
833 size = nlgrpsmax * sizeof (lgrp_affinity_t); | |
834 for (i = 0; i < nthreads; i++) | |
835 aff_buf_array[i] = kmem_zalloc(size, KM_SLEEP); | |
836 | |
837 mutex_enter(&pidlock); | |
838 | |
839 /* | |
840 * Get process again since dropped locks to allocate | |
841 * memory (except current process) | |
842 */ | |
843 if (id != P_MYID) | |
844 p = prfind(id); | |
845 | |
846 /* | |
847 * Process went away after we dropped locks and before | |
848 * reacquiring them, so drop locks, free memory, and | |
849 * return. | |
850 */ | |
851 if (p == NULL) { | |
852 mutex_exit(&pidlock); | |
853 for (i = 0; i < nthreads; i++) | |
854 kmem_free(aff_buf_array[i], size); | |
855 kmem_free(aff_buf_array, | |
856 nthreads * sizeof (lgrp_affinity_t *)); | |
857 return (set_errno(ESRCH)); | |
858 } | |
859 | |
860 mutex_enter(&p->p_lock); | |
861 | |
862 /* | |
863 * See whether number of threads is same | |
864 * If not, drop locks, free memory, and try again | |
865 */ | |
866 if (nthreads != p->p_lwpcnt) { | |
867 mutex_exit(&p->p_lock); | |
868 mutex_exit(&pidlock); | |
869 for (i = 0; i < nthreads; i++) | |
870 kmem_free(aff_buf_array[i], size); | |
871 kmem_free(aff_buf_array, | |
872 nthreads * sizeof (lgrp_affinity_t *)); | |
873 continue; | |
874 } | |
875 | |
876 /* | |
877 * Set lgroup affinity for threads in process | |
878 */ | |
879 retval = lgrp_affinity_set_proc(p, lgrp, aff, | |
880 aff_buf_array); | |
881 | |
882 mutex_exit(&p->p_lock); | |
883 mutex_exit(&pidlock); | |
884 | |
885 /* | |
886 * Free any leftover memory, since some threads may | |
887 * have already allocated memory and set lgroup | |
888 * affinities before | |
889 */ | |
890 for (i = 0; i < nthreads; i++) | |
891 if (aff_buf_array[i] != NULL) | |
892 kmem_free(aff_buf_array[i], size); | |
893 kmem_free(aff_buf_array, | |
894 nthreads * sizeof (lgrp_affinity_t *)); | |
895 | |
896 break; | |
897 | |
898 } while (nthreads != p->p_lwpcnt); | |
899 | |
900 break; | |
901 | |
902 default: | |
903 retval = set_errno(EINVAL); | |
904 break; | |
905 } | |
906 | |
907 return (retval); | |
908 } | |
909 | |
910 | |
911 /* | |
912 * Return the latest generation number for the lgroup hierarchy | |
913 * with the given view | |
914 */ | |
915 lgrp_gen_t | |
916 lgrp_generation(lgrp_view_t view) | |
917 { | |
918 cpupart_t *cpupart; | |
919 uint_t gen; | |
920 | |
921 kpreempt_disable(); | |
922 | |
923 /* | |
924 * Determine generation number for given view | |
925 */ | |
926 if (view == LGRP_VIEW_OS) | |
927 /* | |
928 * Return generation number of lgroup hierarchy for OS view | |
929 */ | |
930 gen = lgrp_gen; | |
931 else { | |
932 /* | |
933 * For caller's view, use generation numbers for lgroup | |
934 * hierarchy and caller's pset | |
935 * NOTE: Caller needs to check for change in pset ID | |
936 */ | |
937 cpupart = curthread->t_cpupart; | |
938 ASSERT(cpupart); | |
939 gen = lgrp_gen + cpupart->cp_gen; | |
940 } | |
941 | |
942 kpreempt_enable(); | |
943 | |
944 return (gen); | |
945 } | |
946 | |
947 | |
948 lgrp_id_t | |
949 lgrp_home_thread(kthread_t *t) | |
950 { | |
951 lgrp_id_t home; | |
952 | |
953 ASSERT(t != NULL); | |
954 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); | |
955 | |
956 thread_lock(t); | |
957 | |
958 /* | |
959 * Check to see whether caller has permission to set affinity for | |
960 * thread | |
961 */ | |
7873
e3677dd00778
Mediate some process operations
Stephen Smalley <sds@tycho.nsa.gov>
parents:
2988
diff
changeset
|
962 if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED(), |
7887
f9ded24b041a
[fmac-discuss] [PATCH] Fix more cstyle issues
Stephen Smalley <sds@tycho.nsa.gov>
parents:
7873
diff
changeset
|
963 PROCESS__SETSCHED)) { |
0 | 964 thread_unlock(t); |
965 return (set_errno(EPERM)); | |
966 } | |
967 | |
968 home = lgrp_home_id(t); | |
969 | |
970 thread_unlock(t); | |
971 return (home); | |
972 } | |
973 | |
974 | |
975 /* | |
976 * Get home lgroup of given process or thread | |
977 */ | |
978 lgrp_id_t | |
979 lgrp_home_get(idtype_t idtype, id_t id) | |
980 { | |
981 proc_t *p; | |
982 lgrp_id_t retval; | |
983 kthread_t *t; | |
984 | |
985 /* | |
986 * Get home lgroup of given LWP or process | |
987 */ | |
988 switch (idtype) { | |
989 | |
990 case P_LWPID: | |
991 p = curproc; | |
992 | |
993 /* | |
994 * Set affinity for thread | |
995 */ | |
996 mutex_enter(&p->p_lock); | |
997 if (id == P_MYID) { /* current thread */ | |
998 retval = lgrp_home_thread(curthread); | |
999 } else if (p->p_tlist == NULL) { | |
1000 retval = set_errno(ESRCH); | |
1001 } else { /* other thread */ | |
1002 int found = 0; | |
1003 | |
1004 t = p->p_tlist; | |
1005 do { | |
1006 if (t->t_tid == id) { | |
1007 retval = lgrp_home_thread(t); | |
1008 found = 1; | |
1009 break; | |
1010 } | |
1011 } while ((t = t->t_forw) != p->p_tlist); | |
1012 if (!found) | |
1013 retval = set_errno(ESRCH); | |
1014 } | |
1015 mutex_exit(&p->p_lock); | |
1016 break; | |
1017 | |
1018 case P_PID: | |
1019 /* | |
1020 * Get process | |
1021 */ | |
1022 mutex_enter(&pidlock); | |
1023 | |
1024 if (id == P_MYID) | |
1025 p = curproc; | |
1026 else | |
1027 p = prfind(id); | |
1028 | |
1029 if (p == NULL) { | |
1030 mutex_exit(&pidlock); | |
1031 return (set_errno(ESRCH)); | |
1032 } | |
1033 | |
1034 mutex_enter(&p->p_lock); | |
1035 t = p->p_tlist; | |
1036 if (t == NULL) | |
1037 retval = set_errno(ESRCH); | |
1038 else | |
1039 retval = lgrp_home_thread(t); | |
1040 mutex_exit(&p->p_lock); | |
1041 | |
1042 mutex_exit(&pidlock); | |
1043 | |
1044 break; | |
1045 | |
1046 default: | |
1047 retval = set_errno(EINVAL); | |
1048 break; | |
1049 } | |
1050 | |
1051 return (retval); | |
1052 } | |
1053 | |
1054 | |
1055 /* | |
1056 * Return latency between "from" and "to" lgroups | |
1057 * | |
1058 * This latency number can only be used for relative comparison | |
1059 * between lgroups on the running system, cannot be used across platforms, | |
1060 * and may not reflect the actual latency. It is platform and implementation | |
1061 * specific, so platform gets to decide its value. It would be nice if the | |
1062 * number was at least proportional to make comparisons more meaningful though. | |
1063 */ | |
1064 int | |
1065 lgrp_latency(lgrp_id_t from, lgrp_id_t to) | |
1066 { | |
1067 lgrp_t *from_lgrp; | |
1068 int i; | |
1069 int latency; | |
1070 int latency_max; | |
1071 lgrp_t *to_lgrp; | |
1072 | |
1073 ASSERT(MUTEX_HELD(&cpu_lock)); | |
1074 | |
1075 if (from < 0 || to < 0) | |
1076 return (set_errno(EINVAL)); | |
1077 | |
1078 if (from > lgrp_alloc_max || to > lgrp_alloc_max) | |
1079 return (set_errno(ESRCH)); | |
1080 | |
1081 from_lgrp = lgrp_table[from]; | |
1082 to_lgrp = lgrp_table[to]; | |
1083 | |
1084 if (!LGRP_EXISTS(from_lgrp) || !LGRP_EXISTS(to_lgrp)) { | |
1085 return (set_errno(ESRCH)); | |
1086 } | |
1087 | |
1088 /* | |
1089 * Get latency for same lgroup | |
1090 */ | |
1091 if (from == to) { | |
1092 latency = from_lgrp->lgrp_latency; | |
1093 return (latency); | |
1094 } | |
1095 | |
1096 /* | |
1097 * Get latency between leaf lgroups | |
1098 */ | |
1099 if (from_lgrp->lgrp_childcnt == 0 && to_lgrp->lgrp_childcnt == 0) | |
1100 return (lgrp_plat_latency(from_lgrp->lgrp_plathand, | |
1101 to_lgrp->lgrp_plathand)); | |
1102 | |
1103 /* | |
1104 * Determine max latency between resources in two lgroups | |
1105 */ | |
1106 latency_max = 0; | |
1107 for (i = 0; i <= lgrp_alloc_max; i++) { | |
1108 lgrp_t *from_rsrc; | |
1109 int j; | |
1110 lgrp_t *to_rsrc; | |
1111 | |
1112 from_rsrc = lgrp_table[i]; | |
1113 if (!LGRP_EXISTS(from_rsrc) || | |
1114 !klgrpset_ismember(from_lgrp->lgrp_set[LGRP_RSRC_CPU], i)) | |
1115 continue; | |
1116 | |
1117 for (j = 0; j <= lgrp_alloc_max; j++) { | |
1118 to_rsrc = lgrp_table[j]; | |
1119 if (!LGRP_EXISTS(to_rsrc) || | |
1120 klgrpset_ismember(to_lgrp->lgrp_set[LGRP_RSRC_MEM], | |
1121 j) == 0) | |
1122 continue; | |
1123 latency = lgrp_plat_latency(from_rsrc->lgrp_plathand, | |
1124 to_rsrc->lgrp_plathand); | |
1125 if (latency > latency_max) | |
1126 latency_max = latency; | |
1127 } | |
1128 } | |
1129 return (latency_max); | |
1130 } | |
1131 | |
1132 | |
1133 /* | |
1134 * Return lgroup interface version number | |
1135 * 0 - none | |
1136 * 1 - original | |
1137 * 2 - lgrp_latency_cookie() and lgrp_resources() added | |
1138 */ | |
1139 int | |
1140 lgrp_version(int version) | |
1141 { | |
1142 /* | |
1143 * Return LGRP_VER_NONE when requested version isn't supported | |
1144 */ | |
1145 if (version < LGRP_VER_NONE || version > LGRP_VER_CURRENT) | |
1146 return (LGRP_VER_NONE); | |
1147 | |
1148 /* | |
1149 * Return current version when LGRP_VER_NONE passed in | |
1150 */ | |
1151 if (version == LGRP_VER_NONE) | |
1152 return (LGRP_VER_CURRENT); | |
1153 | |
1154 /* | |
1155 * Otherwise, return supported version. | |
1156 */ | |
1157 return (version); | |
1158 } | |
1159 | |
1160 | |
1161 /* | |
1162 * Snapshot of lgroup hieararchy | |
1163 * | |
1164 * One snapshot is kept and is based on the kernel's native data model, so | |
1165 * a 32-bit snapshot is kept for the 32-bit kernel and a 64-bit one for the | |
1166 * 64-bit kernel. If a 32-bit user wants a snapshot from the 64-bit kernel, | |
1167 * the kernel generates a 32-bit snapshot from the data in its 64-bit snapshot. | |
1168 * | |
1169 * The format is defined by lgroup snapshot header and the layout of | |
1170 * the snapshot in memory is as follows: | |
1171 * 1) lgroup snapshot header | |
1172 * - specifies format of snapshot | |
1173 * - defined by lgrp_snapshot_header_t | |
1174 * 2) lgroup info array | |
1175 * - contains information about each lgroup | |
1176 * - one element for each lgroup | |
1177 * - each element is defined by lgrp_info_t | |
1178 * 3) lgroup CPU ID array | |
1179 * - contains list (array) of CPU IDs for each lgroup | |
1180 * - lgrp_info_t points into array and specifies how many CPUs belong to | |
1181 * given lgroup | |
1182 * 4) lgroup parents array | |
1183 * - contains lgroup bitmask of parents for each lgroup | |
1184 * - bitmask is an array of unsigned longs and its size depends on nlgrpsmax | |
1185 * 5) lgroup children array | |
1186 * - contains lgroup bitmask of children for each lgroup | |
1187 * - bitmask is an array of unsigned longs and its size depends on nlgrpsmax | |
1188 * 6) lgroup resources array | |
1189 * - contains lgroup bitmask of resources for each lgroup | |
1190 * - bitmask is an array of unsigned longs and its size depends on nlgrpsmax | |
1191 * 7) lgroup latency table | |
1192 * - contains latency from each lgroup to each of other lgroups | |
1193 * | |
1194 * NOTE: Must use nlgrpsmax for per lgroup data structures because lgroups | |
1195 * may be sparsely allocated. | |
1196 */ | |
1197 lgrp_snapshot_header_t *lgrp_snap = NULL; /* lgroup snapshot */ | |
1198 static kmutex_t lgrp_snap_lock; /* snapshot lock */ | |
1199 | |
1200 | |
1201 /* | |
1202 * Take a snapshot of lgroup hierarchy and return size of buffer | |
1203 * needed to hold snapshot | |
1204 */ | |
1205 static int | |
1206 lgrp_snapshot(void) | |
1207 { | |
1208 size_t bitmask_size; | |
1209 size_t bitmasks_size; | |
1210 size_t bufsize; | |
1211 int cpu_index; | |
1212 size_t cpuids_size; | |
1213 int i; | |
1214 int j; | |
1215 size_t info_size; | |
1216 size_t lats_size; | |
1217 ulong_t *lgrp_children; | |
1218 processorid_t *lgrp_cpuids; | |
1219 lgrp_info_t *lgrp_info; | |
1220 int **lgrp_lats; | |
1221 ulong_t *lgrp_parents; | |
1222 ulong_t *lgrp_rsets; | |
1223 ulong_t *lgrpset; | |
1224 int snap_ncpus; | |
1225 int snap_nlgrps; | |
1226 int snap_nlgrpsmax; | |
1227 size_t snap_hdr_size; | |
1228 #ifdef _SYSCALL32_IMPL | |
1229 model_t model = DATAMODEL_NATIVE; | |
1230 | |
1231 /* | |
1232 * Have up-to-date snapshot, so check to see whether caller is 32-bit | |
1233 * program and need to return size of 32-bit snapshot now. | |
1234 */ | |
1235 model = get_udatamodel(); | |
1236 if (model == DATAMODEL_ILP32 && lgrp_snap && | |
1237 lgrp_snap->ss_gen == lgrp_gen) { | |
1238 | |
1239 snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max; | |
1240 | |
1241 /* | |
1242 * Calculate size of buffer needed for 32-bit snapshot, | |
1243 * rounding up size of each object to allow for alignment | |
1244 * of next object in buffer. | |
1245 */ | |
1246 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t), | |
1247 sizeof (caddr32_t)); | |
1248 info_size = | |
1249 P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t), | |
1250 sizeof (processorid_t)); | |
1251 cpuids_size = | |
1252 P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t), | |
1253 sizeof (ulong_t)); | |
1254 | |
1255 /* | |
1256 * lgroup bitmasks needed for parents, children, and resources | |
1257 * for each lgroup and pset lgroup set | |
1258 */ | |
1259 bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax); | |
1260 bitmasks_size = (((2 + LGRP_RSRC_COUNT) * | |
1261 snap_nlgrpsmax) + 1) * bitmask_size; | |
1262 | |
1263 /* | |
1264 * Size of latency table and buffer | |
1265 */ | |
1266 lats_size = snap_nlgrpsmax * sizeof (caddr32_t) + | |
1267 snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int); | |
1268 | |
1269 bufsize = snap_hdr_size + info_size + cpuids_size + | |
1270 bitmasks_size + lats_size; | |
1271 return (bufsize); | |
1272 } | |
1273 #endif /* _SYSCALL32_IMPL */ | |
1274 | |
1275 /* | |
1276 * Check whether snapshot is up-to-date | |
1277 * Free it and take another one if not | |
1278 */ | |
1279 if (lgrp_snap) { | |
1280 if (lgrp_snap->ss_gen == lgrp_gen) | |
1281 return (lgrp_snap->ss_size); | |
1282 | |
1283 kmem_free(lgrp_snap, lgrp_snap->ss_size); | |
1284 lgrp_snap = NULL; | |
1285 } | |
1286 | |
1287 /* | |
1288 * Allocate memory for snapshot | |
1289 * w/o holding cpu_lock while waiting for memory | |
1290 */ | |
1291 while (lgrp_snap == NULL) { | |
1292 int old_generation; | |
1293 | |
1294 /* | |
1295 * Take snapshot of lgroup generation number | |
1296 * and configuration size dependent information | |
1297 * NOTE: Only count number of online CPUs, | |
1298 * since only online CPUs appear in lgroups. | |
1299 */ | |
1300 mutex_enter(&cpu_lock); | |
1301 old_generation = lgrp_gen; | |
1302 snap_ncpus = ncpus_online; | |
1303 snap_nlgrps = nlgrps; | |
1304 snap_nlgrpsmax = nlgrpsmax; | |
1305 mutex_exit(&cpu_lock); | |
1306 | |
1307 /* | |
1308 * Calculate size of buffer needed for snapshot, | |
1309 * rounding up size of each object to allow for alignment | |
1310 * of next object in buffer. | |
1311 */ | |
1312 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header_t), | |
1313 sizeof (void *)); | |
1314 info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info_t), | |
1315 sizeof (processorid_t)); | |
1316 cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t), | |
1317 sizeof (ulong_t)); | |
1318 /* | |
1319 * lgroup bitmasks needed for pset lgroup set and parents, | |
1320 * children, and resource sets for each lgroup | |
1321 */ | |
1322 bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax); | |
1323 bitmasks_size = (((2 + LGRP_RSRC_COUNT) * | |
1324 snap_nlgrpsmax) + 1) * bitmask_size; | |
1325 | |
1326 /* | |
1327 * Size of latency table and buffer | |
1328 */ | |
1329 lats_size = snap_nlgrpsmax * sizeof (int *) + | |
1330 snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int); | |
1331 | |
1332 bufsize = snap_hdr_size + info_size + cpuids_size + | |
1333 bitmasks_size + lats_size; | |
1334 | |
1335 /* | |
1336 * Allocate memory for buffer | |
1337 */ | |
1338 lgrp_snap = kmem_zalloc(bufsize, KM_NOSLEEP); | |
1339 if (lgrp_snap == NULL) | |
1340 return (set_errno(ENOMEM)); | |
1341 | |
1342 /* | |
1343 * Check whether generation number has changed | |
1344 */ | |
1345 mutex_enter(&cpu_lock); | |
1346 if (lgrp_gen == old_generation) | |
1347 break; /* hasn't change, so done. */ | |
1348 | |
1349 /* | |
1350 * Generation number changed, so free memory and try again. | |
1351 */ | |
1352 mutex_exit(&cpu_lock); | |
1353 kmem_free(lgrp_snap, bufsize); | |
1354 lgrp_snap = NULL; | |
1355 } | |
1356 | |
1357 /* | |
1358 * Fill in lgroup snapshot header | |
1359 * (including pointers to tables of lgroup info, CPU IDs, and parents | |
1360 * and children) | |
1361 */ | |
1362 lgrp_snap->ss_version = LGRP_VER_CURRENT; | |
1363 | |
1364 /* | |
1365 * XXX For now, liblgrp only needs to know whether the hierarchy | |
1366 * XXX only has one level or not | |
1367 */ | |
1368 if (snap_nlgrps == 1) | |
1369 lgrp_snap->ss_levels = 1; | |
1370 else | |
1371 lgrp_snap->ss_levels = 2; | |
1372 | |
1373 lgrp_snap->ss_root = LGRP_ROOTID; | |
1374 | |
1375 lgrp_snap->ss_nlgrps = lgrp_snap->ss_nlgrps_os = snap_nlgrps; | |
1376 lgrp_snap->ss_nlgrps_max = snap_nlgrpsmax; | |
1377 lgrp_snap->ss_ncpus = snap_ncpus; | |
1378 lgrp_snap->ss_gen = lgrp_gen; | |
1379 lgrp_snap->ss_view = LGRP_VIEW_OS; | |
1380 lgrp_snap->ss_pset = 0; /* NOTE: caller should set if needed */ | |
1381 lgrp_snap->ss_size = bufsize; | |
1382 lgrp_snap->ss_magic = (uintptr_t)lgrp_snap; | |
1383 | |
1384 lgrp_snap->ss_info = lgrp_info = | |
1385 (lgrp_info_t *)((uintptr_t)lgrp_snap + snap_hdr_size); | |
1386 | |
1387 lgrp_snap->ss_cpuids = lgrp_cpuids = | |
1388 (processorid_t *)((uintptr_t)lgrp_info + info_size); | |
1389 | |
1390 lgrp_snap->ss_lgrpset = lgrpset = | |
1391 (ulong_t *)((uintptr_t)lgrp_cpuids + cpuids_size); | |
1392 | |
1393 lgrp_snap->ss_parents = lgrp_parents = | |
1394 (ulong_t *)((uintptr_t)lgrpset + bitmask_size); | |
1395 | |
1396 lgrp_snap->ss_children = lgrp_children = | |
1397 (ulong_t *)((uintptr_t)lgrp_parents + (snap_nlgrpsmax * | |
1398 bitmask_size)); | |
1399 | |
1400 lgrp_snap->ss_rsets = lgrp_rsets = | |
1401 (ulong_t *)((uintptr_t)lgrp_children + (snap_nlgrpsmax * | |
1402 bitmask_size)); | |
1403 | |
1404 lgrp_snap->ss_latencies = lgrp_lats = | |
1405 (int **)((uintptr_t)lgrp_rsets + (LGRP_RSRC_COUNT * | |
1406 snap_nlgrpsmax * bitmask_size)); | |
1407 | |
1408 /* | |
1409 * Fill in lgroup information | |
1410 */ | |
1411 cpu_index = 0; | |
1412 for (i = 0; i < snap_nlgrpsmax; i++) { | |
1413 struct cpu *cp; | |
1414 int cpu_count; | |
1415 struct cpu *head; | |
1416 int k; | |
1417 lgrp_t *lgrp; | |
1418 | |
1419 lgrp = lgrp_table[i]; | |
1420 if (!LGRP_EXISTS(lgrp)) { | |
1421 bzero(&lgrp_info[i], sizeof (lgrp_info[i])); | |
1422 lgrp_info[i].info_lgrpid = LGRP_NONE; | |
1423 continue; | |
1424 } | |
1425 | |
1426 lgrp_info[i].info_lgrpid = i; | |
1427 lgrp_info[i].info_latency = lgrp->lgrp_latency; | |
1428 | |
1429 /* | |
1430 * Fill in parents, children, and lgroup resources | |
1431 */ | |
1432 lgrp_info[i].info_parents = | |
1433 (ulong_t *)((uintptr_t)lgrp_parents + (i * bitmask_size)); | |
1434 | |
1435 if (lgrp->lgrp_parent) | |
1436 BT_SET(lgrp_info[i].info_parents, | |
1437 lgrp->lgrp_parent->lgrp_id); | |
1438 | |
1439 lgrp_info[i].info_children = | |
1440 (ulong_t *)((uintptr_t)lgrp_children + (i * bitmask_size)); | |
1441 | |
1442 for (j = 0; j < snap_nlgrpsmax; j++) | |
1443 if (klgrpset_ismember(lgrp->lgrp_children, j)) | |
1444 BT_SET(lgrp_info[i].info_children, j); | |
1445 | |
1446 lgrp_info[i].info_rset = | |
1447 (ulong_t *)((uintptr_t)lgrp_rsets + | |
1448 (i * LGRP_RSRC_COUNT * bitmask_size)); | |
1449 | |
1450 for (j = 0; j < LGRP_RSRC_COUNT; j++) { | |
1451 ulong_t *rset; | |
1452 | |
1453 rset = (ulong_t *)((uintptr_t)lgrp_info[i].info_rset + | |
1454 (j * bitmask_size)); | |
1455 for (k = 0; k < snap_nlgrpsmax; k++) | |
1456 if (klgrpset_ismember(lgrp->lgrp_set[j], k)) | |
1457 BT_SET(rset, k); | |
1458 } | |
1459 | |
1460 /* | |
1461 * Fill in CPU IDs | |
1462 */ | |
1463 cpu_count = 0; | |
1464 lgrp_info[i].info_cpuids = NULL; | |
1465 cp = head = lgrp->lgrp_cpu; | |
1466 if (head != NULL) { | |
1467 lgrp_info[i].info_cpuids = &lgrp_cpuids[cpu_index]; | |
1468 do { | |
1469 lgrp_cpuids[cpu_index] = cp->cpu_id; | |
1470 cpu_index++; | |
1471 cpu_count++; | |
1472 cp = cp->cpu_next_lgrp; | |
1473 } while (cp != head); | |
1474 } | |
1475 ASSERT(cpu_count == lgrp->lgrp_cpucnt); | |
1476 lgrp_info[i].info_ncpus = cpu_count; | |
1477 | |
1478 /* | |
1479 * Fill in memory sizes for lgroups that directly contain | |
1480 * memory | |
1481 */ | |
1482 if (klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_MEM], i)) { | |
1483 lgrp_info[i].info_mem_free = | |
1484 lgrp_mem_size(i, LGRP_MEM_SIZE_FREE); | |
1485 lgrp_info[i].info_mem_install = | |
1486 lgrp_mem_size(i, LGRP_MEM_SIZE_INSTALL); | |
1487 } | |
1488 | |
1489 /* | |
1490 * Fill in latency table and buffer | |
1491 */ | |
1492 lgrp_lats[i] = (int *)((uintptr_t)lgrp_lats + snap_nlgrpsmax * | |
1493 sizeof (int *) + i * snap_nlgrpsmax * sizeof (int)); | |
1494 for (j = 0; j < snap_nlgrpsmax; j++) { | |
1495 lgrp_t *to; | |
1496 | |
1497 to = lgrp_table[j]; | |
1498 if (!LGRP_EXISTS(to)) | |
1499 continue; | |
1500 lgrp_lats[i][j] = lgrp_latency(lgrp->lgrp_id, | |
1501 to->lgrp_id); | |
1502 } | |
1503 } | |
1504 ASSERT(cpu_index == snap_ncpus); | |
1505 | |
1506 | |
1507 mutex_exit(&cpu_lock); | |
1508 | |
1509 #ifdef _SYSCALL32_IMPL | |
1510 /* | |
1511 * Check to see whether caller is 32-bit program and need to return | |
1512 * size of 32-bit snapshot now that snapshot has been taken/updated. | |
1513 * May not have been able to do this earlier if snapshot was out of | |
1514 * date or didn't exist yet. | |
1515 */ | |
1516 if (model == DATAMODEL_ILP32) { | |
1517 | |
1518 snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max; | |
1519 | |
1520 /* | |
1521 * Calculate size of buffer needed for 32-bit snapshot, | |
1522 * rounding up size of each object to allow for alignment | |
1523 * of next object in buffer. | |
1524 */ | |
1525 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t), | |
1526 sizeof (caddr32_t)); | |
1527 info_size = | |
1528 P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t), | |
1529 sizeof (processorid_t)); | |
1530 cpuids_size = | |
1531 P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t), | |
1532 sizeof (ulong_t)); | |
1533 | |
1534 bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax); | |
1535 bitmasks_size = (((2 + LGRP_RSRC_COUNT) * snap_nlgrpsmax) + | |
1536 1) * bitmask_size; | |
1537 | |
1538 | |
1539 /* | |
1540 * Size of latency table and buffer | |
1541 */ | |
1542 lats_size = (snap_nlgrpsmax * sizeof (caddr32_t)) + | |
1543 (snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int)); | |
1544 | |
1545 bufsize = snap_hdr_size + info_size + cpuids_size + | |
1546 bitmasks_size + lats_size; | |
1547 return (bufsize); | |
1548 } | |
1549 #endif /* _SYSCALL32_IMPL */ | |
1550 | |
1551 return (lgrp_snap->ss_size); | |
1552 } | |
1553 | |
1554 | |
1555 /* | |
1556 * Copy snapshot into given user buffer, fix up any pointers in buffer to point | |
1557 * into user instead of kernel address space, and return size of buffer | |
1558 * needed to hold snapshot | |
1559 */ | |
1560 static int | |
1561 lgrp_snapshot_copy(char *buf, size_t bufsize) | |
1562 { | |
1563 size_t bitmask_size; | |
1564 int cpu_index; | |
1565 size_t cpuids_size; | |
1566 int i; | |
1567 size_t info_size; | |
1568 lgrp_info_t *lgrp_info; | |
1569 int retval; | |
1570 size_t snap_hdr_size; | |
1571 int snap_ncpus; | |
1572 int snap_nlgrpsmax; | |
1573 lgrp_snapshot_header_t *user_snap; | |
1574 lgrp_info_t *user_info; | |
1575 lgrp_info_t *user_info_buffer; | |
1576 processorid_t *user_cpuids; | |
1577 ulong_t *user_lgrpset; | |
1578 ulong_t *user_parents; | |
1579 ulong_t *user_children; | |
1580 int **user_lats; | |
1581 int **user_lats_buffer; | |
1582 ulong_t *user_rsets; | |
1583 | |
1584 if (lgrp_snap == NULL) | |
1585 return (0); | |
1586 | |
1587 if (buf == NULL || bufsize <= 0) | |
1588 return (lgrp_snap->ss_size); | |
1589 | |
1590 /* | |
1591 * User needs to try getting size of buffer again | |
1592 * because given buffer size is too small. | |
1593 * The lgroup hierarchy may have changed after they asked for the size | |
1594 * but before the snapshot was taken. | |
1595 */ | |
1596 if (bufsize < lgrp_snap->ss_size) | |
1597 return (set_errno(EAGAIN)); | |
1598 | |
1599 snap_ncpus = lgrp_snap->ss_ncpus; | |
1600 snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max; | |
1601 | |
1602 /* | |
1603 * Fill in lgrpset now because caller may have change psets | |
1604 */ | |
1605 kpreempt_disable(); | |
1606 for (i = 0; i < snap_nlgrpsmax; i++) { | |
1607 if (klgrpset_ismember(curthread->t_cpupart->cp_lgrpset, | |
1608 i)) { | |
1609 BT_SET(lgrp_snap->ss_lgrpset, i); | |
1610 } | |
1611 } | |
1612 kpreempt_enable(); | |
1613 | |
1614 /* | |
1615 * Copy lgroup snapshot (snapshot header, lgroup info, and CPU IDs) | |
1616 * into user buffer all at once | |
1617 */ | |
1618 if (copyout(lgrp_snap, buf, lgrp_snap->ss_size) != 0) | |
1619 return (set_errno(EFAULT)); | |
1620 | |
1621 /* | |
1622 * Round up sizes of lgroup snapshot header and info for alignment | |
1623 */ | |
1624 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header_t), | |
1625 sizeof (void *)); | |
1626 info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info_t), | |
1627 sizeof (processorid_t)); | |
1628 cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t), | |
1629 sizeof (ulong_t)); | |
1630 | |
1631 bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax); | |
1632 | |
1633 /* | |
1634 * Calculate pointers into user buffer for lgroup snapshot header, | |
1635 * info, and CPU IDs | |
1636 */ | |
1637 user_snap = (lgrp_snapshot_header_t *)buf; | |
1638 user_info = (lgrp_info_t *)((uintptr_t)user_snap + snap_hdr_size); | |
1639 user_cpuids = (processorid_t *)((uintptr_t)user_info + info_size); | |
1640 user_lgrpset = (ulong_t *)((uintptr_t)user_cpuids + cpuids_size); | |
1641 user_parents = (ulong_t *)((uintptr_t)user_lgrpset + bitmask_size); | |
1642 user_children = (ulong_t *)((uintptr_t)user_parents + | |
1643 (snap_nlgrpsmax * bitmask_size)); | |
1644 user_rsets = (ulong_t *)((uintptr_t)user_children + | |
1645 (snap_nlgrpsmax * bitmask_size)); | |
1646 user_lats = (int **)((uintptr_t)user_rsets + | |
1647 (LGRP_RSRC_COUNT * snap_nlgrpsmax * bitmask_size)); | |
1648 | |
1649 /* | |
1650 * Copyout magic number (ie. pointer to beginning of buffer) | |
1651 */ | |
1652 if (copyout(&buf, &user_snap->ss_magic, sizeof (buf)) != 0) | |
1653 return (set_errno(EFAULT)); | |
1654 | |
1655 /* | |
1656 * Fix up pointers in user buffer to point into user buffer | |
1657 * not kernel snapshot | |
1658 */ | |
1659 if (copyout(&user_info, &user_snap->ss_info, sizeof (user_info)) != 0) | |
1660 return (set_errno(EFAULT)); | |
1661 | |
1662 if (copyout(&user_cpuids, &user_snap->ss_cpuids, | |
1663 sizeof (user_cpuids)) != 0) | |
1664 return (set_errno(EFAULT)); | |
1665 | |
1666 if (copyout(&user_lgrpset, &user_snap->ss_lgrpset, | |
1667 sizeof (user_lgrpset)) != 0) | |
1668 return (set_errno(EFAULT)); | |
1669 | |
1670 if (copyout(&user_parents, &user_snap->ss_parents, | |
1671 sizeof (user_parents)) != 0) | |
1672 return (set_errno(EFAULT)); | |
1673 | |
1674 if (copyout(&user_children, &user_snap->ss_children, | |
1675 sizeof (user_children)) != 0) | |
1676 return (set_errno(EFAULT)); | |
1677 | |
1678 if (copyout(&user_rsets, &user_snap->ss_rsets, | |
1679 sizeof (user_rsets)) != 0) | |
1680 return (set_errno(EFAULT)); | |
1681 | |
1682 if (copyout(&user_lats, &user_snap->ss_latencies, | |
1683 sizeof (user_lats)) != 0) | |
1684 return (set_errno(EFAULT)); | |
1685 | |
1686 /* | |
1687 * Make copies of lgroup info and latency table, fix up pointers, | |
1688 * and then copy them into user buffer | |
1689 */ | |
1690 user_info_buffer = kmem_zalloc(info_size, KM_NOSLEEP); | |
1691 if (user_info_buffer == NULL) | |
1692 return (set_errno(ENOMEM)); | |
1693 | |
1694 user_lats_buffer = kmem_zalloc(snap_nlgrpsmax * sizeof (int *), | |
1695 KM_NOSLEEP); | |
1696 if (user_lats_buffer == NULL) { | |
1697 kmem_free(user_info_buffer, info_size); | |
1698 return (set_errno(ENOMEM)); | |
1699 } | |
1700 | |
1701 lgrp_info = (lgrp_info_t *)((uintptr_t)lgrp_snap + snap_hdr_size); | |
1702 bcopy(lgrp_info, user_info_buffer, info_size); | |
1703 | |
1704 cpu_index = 0; | |
1705 for (i = 0; i < snap_nlgrpsmax; i++) { | |
1706 ulong_t *snap_rset; | |
1707 | |
1708 /* | |
1709 * Skip non-existent lgroups | |
1710 */ | |
1711 if (user_info_buffer[i].info_lgrpid == LGRP_NONE) | |
1712 continue; | |
1713 | |
1714 /* | |
1715 * Update free memory size since it changes frequently | |
1716 * Only do so for lgroups directly containing memory | |
1717 * | |
1718 * NOTE: This must be done before changing the pointers to | |
1719 * point into user space since we need to dereference | |
1720 * lgroup resource set | |
1721 */ | |
1722 snap_rset = &lgrp_info[i].info_rset[LGRP_RSRC_MEM * | |
1723 BT_BITOUL(snap_nlgrpsmax)]; | |
1724 if (BT_TEST(snap_rset, i)) | |
1725 user_info_buffer[i].info_mem_free = | |
1726 lgrp_mem_size(i, LGRP_MEM_SIZE_FREE); | |
1727 | |
1728 /* | |
1729 * Fix up pointers to parents, children, resources, and | |
1730 * latencies | |
1731 */ | |
1732 user_info_buffer[i].info_parents = | |
1733 (ulong_t *)((uintptr_t)user_parents + (i * bitmask_size)); | |
1734 user_info_buffer[i].info_children = | |
1735 (ulong_t *)((uintptr_t)user_children + (i * bitmask_size)); | |
1736 user_info_buffer[i].info_rset = | |
1737 (ulong_t *)((uintptr_t)user_rsets + | |
1738 (i * LGRP_RSRC_COUNT * bitmask_size)); | |
1739 user_lats_buffer[i] = (int *)((uintptr_t)user_lats + | |
1740 (snap_nlgrpsmax * sizeof (int *)) + (i * snap_nlgrpsmax * | |
1741 sizeof (int))); | |
1742 | |
1743 /* | |
1744 * Fix up pointer to CPU IDs | |
1745 */ | |
1746 if (user_info_buffer[i].info_ncpus == 0) { | |
1747 user_info_buffer[i].info_cpuids = NULL; | |
1748 continue; | |
1749 } | |
1750 user_info_buffer[i].info_cpuids = &user_cpuids[cpu_index]; | |
1751 cpu_index += user_info_buffer[i].info_ncpus; | |
1752 } | |
1753 ASSERT(cpu_index == snap_ncpus); | |
1754 | |
1755 /* | |
1756 * Copy lgroup info and latency table with pointers fixed up to point | |
1757 * into user buffer out to user buffer now | |
1758 */ | |
1759 retval = lgrp_snap->ss_size; | |
1760 if (copyout(user_info_buffer, user_info, info_size) != 0) | |
1761 retval = set_errno(EFAULT); | |
1762 kmem_free(user_info_buffer, info_size); | |
1763 | |
1764 if (copyout(user_lats_buffer, user_lats, snap_nlgrpsmax * | |
1765 sizeof (int *)) != 0) | |
1766 retval = set_errno(EFAULT); | |
1767 kmem_free(user_lats_buffer, snap_nlgrpsmax * sizeof (int *)); | |
1768 | |
1769 return (retval); | |
1770 } | |
1771 | |
1772 | |
1773 #ifdef _SYSCALL32_IMPL | |
1774 /* | |
1775 * Make 32-bit copy of snapshot, fix up any pointers in buffer to point | |
1776 * into user instead of kernel address space, copy 32-bit snapshot into | |
1777 * given user buffer, and return size of buffer needed to hold snapshot | |
1778 */ | |
1779 static int | |
1780 lgrp_snapshot_copy32(caddr32_t buf, size32_t bufsize) | |
1781 { | |
1782 size32_t bitmask_size; | |
1783 size32_t bitmasks_size; | |
1784 size32_t children_size; | |
1785 int cpu_index; | |
1786 size32_t cpuids_size; | |
1787 int i; | |
1788 int j; | |
1789 size32_t info_size; | |
1790 size32_t lats_size; | |
1791 lgrp_info_t *lgrp_info; | |
1792 lgrp_snapshot_header32_t *lgrp_snap32; | |
1793 lgrp_info32_t *lgrp_info32; | |
1794 processorid_t *lgrp_cpuids32; | |
1795 caddr32_t *lgrp_lats32; | |
1796 int **lgrp_lats32_kernel; | |
1797 uint_t *lgrp_set32; | |
1798 uint_t *lgrp_parents32; | |
1799 uint_t *lgrp_children32; | |
1800 uint_t *lgrp_rsets32; | |
1801 size32_t parents_size; | |
1802 size32_t rsets_size; | |
1803 size32_t set_size; | |
1804 size32_t snap_hdr_size; | |
1805 int snap_ncpus; | |
1806 int snap_nlgrpsmax; | |
1807 size32_t snap_size; | |
1808 | |
1809 if (lgrp_snap == NULL) | |
1810 return (0); | |
1811 | |
1812 snap_ncpus = lgrp_snap->ss_ncpus; | |
1813 snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max; | |
1814 | |
1815 /* | |
1816 * Calculate size of buffer needed for 32-bit snapshot, | |
1817 * rounding up size of each object to allow for alignment | |
1818 * of next object in buffer. | |
1819 */ | |
1820 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t), | |
1821 sizeof (caddr32_t)); | |
1822 info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t), | |
1823 sizeof (processorid_t)); | |
1824 cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t), | |
1825 sizeof (ulong_t)); | |
1826 | |
1827 bitmask_size = BT_SIZEOFMAP32(snap_nlgrpsmax); | |
1828 | |
1829 set_size = bitmask_size; | |
1830 parents_size = snap_nlgrpsmax * bitmask_size; | |
1831 children_size = snap_nlgrpsmax * bitmask_size; | |
1832 rsets_size = P2ROUNDUP(LGRP_RSRC_COUNT * snap_nlgrpsmax * | |
1833 (int)bitmask_size, sizeof (caddr32_t)); | |
1834 | |
1835 bitmasks_size = set_size + parents_size + children_size + rsets_size; | |
1836 | |
1837 /* | |
1838 * Size of latency table and buffer | |
1839 */ | |
1840 lats_size = (snap_nlgrpsmax * sizeof (caddr32_t)) + | |
1841 (snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int)); | |
1842 | |
1843 snap_size = snap_hdr_size + info_size + cpuids_size + bitmasks_size + | |
1844 lats_size; | |
1845 | |
1846 if (buf == NULL || bufsize <= 0) { | |
1847 return (snap_size); | |
1848 } | |
1849 | |
1850 /* | |
1851 * User needs to try getting size of buffer again | |
1852 * because given buffer size is too small. | |
1853 * The lgroup hierarchy may have changed after they asked for the size | |
1854 * but before the snapshot was taken. | |
1855 */ | |
1856 if (bufsize < snap_size) | |
1857 return (set_errno(EAGAIN)); | |
1858 | |
1859 /* | |
1860 * Make 32-bit copy of snapshot, fix up pointers to point into user | |
1861 * buffer not kernel, and then copy whole thing into user buffer | |
1862 */ | |
1863 lgrp_snap32 = kmem_zalloc(snap_size, KM_NOSLEEP); | |
1864 if (lgrp_snap32 == NULL) | |
1865 return (set_errno(ENOMEM)); | |
1866 | |
1867 /* | |
1868 * Calculate pointers into 32-bit copy of snapshot | |
1869 * for lgroup info, CPU IDs, pset lgroup bitmask, parents, children, | |
1870 * resources, and latency table and buffer | |
1871 */ | |
1872 lgrp_info32 = (lgrp_info32_t *)((uintptr_t)lgrp_snap32 + | |
1873 snap_hdr_size); | |
1874 lgrp_cpuids32 = (processorid_t *)((uintptr_t)lgrp_info32 + info_size); | |
1875 lgrp_set32 = (uint_t *)((uintptr_t)lgrp_cpuids32 + cpuids_size); | |
1876 lgrp_parents32 = (uint_t *)((uintptr_t)lgrp_set32 + set_size); | |
1877 lgrp_children32 = (uint_t *)((uintptr_t)lgrp_parents32 + parents_size); | |
1878 lgrp_rsets32 = (uint_t *)((uintptr_t)lgrp_children32 + children_size); | |
1879 lgrp_lats32 = (caddr32_t *)((uintptr_t)lgrp_rsets32 + rsets_size); | |
1880 | |
1881 /* | |
1882 * Make temporary lgroup latency table of pointers for kernel to use | |
1883 * to fill in rows of table with latencies from each lgroup | |
1884 */ | |
1885 lgrp_lats32_kernel = kmem_zalloc(snap_nlgrpsmax * sizeof (int *), | |
1886 KM_NOSLEEP); | |
1887 if (lgrp_lats32_kernel == NULL) { | |
1888 kmem_free(lgrp_snap32, snap_size); | |
1889 return (set_errno(ENOMEM)); | |
1890 } | |
1891 | |
1892 /* | |
1893 * Fill in 32-bit lgroup snapshot header | |
1894 * (with pointers into user's buffer for lgroup info, CPU IDs, | |
1895 * bit masks, and latencies) | |
1896 */ | |
1897 lgrp_snap32->ss_version = lgrp_snap->ss_version; | |
1898 lgrp_snap32->ss_levels = lgrp_snap->ss_levels; | |
1899 lgrp_snap32->ss_nlgrps = lgrp_snap32->ss_nlgrps_os = | |
1900 lgrp_snap->ss_nlgrps; | |
1901 lgrp_snap32->ss_nlgrps_max = snap_nlgrpsmax; | |
1902 lgrp_snap32->ss_root = lgrp_snap->ss_root; | |
1903 lgrp_snap32->ss_ncpus = lgrp_snap->ss_ncpus; | |
1904 lgrp_snap32->ss_gen = lgrp_snap->ss_gen; | |
1905 lgrp_snap32->ss_view = LGRP_VIEW_OS; | |
1906 lgrp_snap32->ss_size = snap_size; | |
1907 lgrp_snap32->ss_magic = buf; | |
1908 lgrp_snap32->ss_info = buf + snap_hdr_size; | |
1909 lgrp_snap32->ss_cpuids = lgrp_snap32->ss_info + info_size; | |
1910 lgrp_snap32->ss_lgrpset = lgrp_snap32->ss_cpuids + cpuids_size; | |
1911 lgrp_snap32->ss_parents = lgrp_snap32->ss_lgrpset + bitmask_size; | |
1912 lgrp_snap32->ss_children = lgrp_snap32->ss_parents + | |
1913 (snap_nlgrpsmax * bitmask_size); | |
1914 lgrp_snap32->ss_rsets = lgrp_snap32->ss_children + | |
1915 (snap_nlgrpsmax * bitmask_size); | |
1916 lgrp_snap32->ss_latencies = lgrp_snap32->ss_rsets + | |
1917 (LGRP_RSRC_COUNT * snap_nlgrpsmax * bitmask_size); | |
1918 | |
1919 /* | |
1920 * Fill in lgrpset now because caller may have change psets | |
1921 */ | |
1922 kpreempt_disable(); | |
1923 for (i = 0; i < snap_nlgrpsmax; i++) { | |
1924 if (klgrpset_ismember(curthread->t_cpupart->cp_lgrpset, | |
1925 i)) { | |
1926 BT_SET32(lgrp_set32, i); | |
1927 } | |
1928 } | |
1929 kpreempt_enable(); | |
1930 | |
1931 /* | |
1932 * Fill in 32-bit copy of lgroup info and fix up pointers | |
1933 * to point into user's buffer instead of kernel's | |
1934 */ | |
1935 cpu_index = 0; | |
1936 lgrp_info = lgrp_snap->ss_info; | |
1937 for (i = 0; i < snap_nlgrpsmax; i++) { | |
1938 uint_t *children; | |
1939 uint_t *lgrp_rset; | |
1940 uint_t *parents; | |
1941 ulong_t *snap_rset; | |
1942 | |
1943 /* | |
1944 * Skip non-existent lgroups | |
1945 */ | |
1946 if (lgrp_info[i].info_lgrpid == LGRP_NONE) { | |
1947 bzero(&lgrp_info32[i], sizeof (lgrp_info32[i])); | |
1948 lgrp_info32[i].info_lgrpid = LGRP_NONE; | |
1949 continue; | |
1950 } | |
1951 | |
1952 /* | |
1953 * Fill in parents, children, lgroup resource set, and | |
1954 * latencies from snapshot | |
1955 */ | |
1956 parents = (uint_t *)((uintptr_t)lgrp_parents32 + | |
1957 i * bitmask_size); | |
1958 children = (uint_t *)((uintptr_t)lgrp_children32 + | |
1959 i * bitmask_size); | |
1960 snap_rset = (ulong_t *)((uintptr_t)lgrp_snap->ss_rsets + | |
1961 (i * LGRP_RSRC_COUNT * BT_SIZEOFMAP(snap_nlgrpsmax))); | |
1962 lgrp_rset = (uint_t *)((uintptr_t)lgrp_rsets32 + | |
1963 (i * LGRP_RSRC_COUNT * bitmask_size)); | |
1964 lgrp_lats32_kernel[i] = (int *)((uintptr_t)lgrp_lats32 + | |
1965 snap_nlgrpsmax * sizeof (caddr32_t) + i * snap_nlgrpsmax * | |
1966 sizeof (int)); | |
1967 for (j = 0; j < snap_nlgrpsmax; j++) { | |
1968 int k; | |
1969 uint_t *rset; | |
1970 | |
1971 if (BT_TEST(&lgrp_snap->ss_parents[i], j)) | |
1972 BT_SET32(parents, j); | |
1973 | |
1974 if (BT_TEST(&lgrp_snap->ss_children[i], j)) | |
1975 BT_SET32(children, j); | |
1976 | |
1977 for (k = 0; k < LGRP_RSRC_COUNT; k++) { | |
1978 rset = (uint_t *)((uintptr_t)lgrp_rset + | |
1979 k * bitmask_size); | |
1980 if (BT_TEST(&snap_rset[k], j)) | |
1981 BT_SET32(rset, j); | |
1982 } | |
1983 | |
1984 lgrp_lats32_kernel[i][j] = | |
1985 lgrp_snap->ss_latencies[i][j]; | |
1986 } | |
1987 | |
1988 /* | |
1989 * Fix up pointer to latency buffer | |
1990 */ | |
1991 lgrp_lats32[i] = lgrp_snap32->ss_latencies + | |
1992 snap_nlgrpsmax * sizeof (caddr32_t) + i * snap_nlgrpsmax * | |
1993 sizeof (int); | |
1994 | |
1995 /* | |
1996 * Fix up pointers for parents, children, and resources | |
1997 */ | |
1998 lgrp_info32[i].info_parents = lgrp_snap32->ss_parents + | |
1999 (i * bitmask_size); | |
2000 lgrp_info32[i].info_children = lgrp_snap32->ss_children + | |
2001 (i * bitmask_size); | |
2002 lgrp_info32[i].info_rset = lgrp_snap32->ss_rsets + | |
2003 (i * LGRP_RSRC_COUNT * bitmask_size); | |
2004 | |
2005 /* | |
2006 * Fill in memory and CPU info | |
2007 * Only fill in memory for lgroups directly containing memory | |
2008 */ | |
2009 snap_rset = &lgrp_info[i].info_rset[LGRP_RSRC_MEM * | |
2010 BT_BITOUL(snap_nlgrpsmax)]; | |
2011 if (BT_TEST(snap_rset, i)) { | |
2012 lgrp_info32[i].info_mem_free = lgrp_mem_size(i, | |
2013 LGRP_MEM_SIZE_FREE); | |
2014 lgrp_info32[i].info_mem_install = | |
2015 lgrp_info[i].info_mem_install; | |
2016 } | |
2017 | |
2018 lgrp_info32[i].info_ncpus = lgrp_info[i].info_ncpus; | |
2019 | |
2020 lgrp_info32[i].info_lgrpid = lgrp_info[i].info_lgrpid; | |
2021 lgrp_info32[i].info_latency = lgrp_info[i].info_latency; | |
2022 | |
2023 if (lgrp_info32[i].info_ncpus == 0) { | |
2024 lgrp_info32[i].info_cpuids = 0; | |
2025 continue; | |
2026 } | |
2027 | |
2028 /* | |
2029 * Fix up pointer for CPU IDs | |
2030 */ | |
2031 lgrp_info32[i].info_cpuids = lgrp_snap32->ss_cpuids + | |
2032 (cpu_index * sizeof (processorid_t)); | |
2033 cpu_index += lgrp_info32[i].info_ncpus; | |
2034 } | |
2035 ASSERT(cpu_index == snap_ncpus); | |
2036 | |
2037 /* | |
2038 * Copy lgroup CPU IDs into 32-bit snapshot | |
2039 * before copying it out into user's buffer | |
2040 */ | |
2041 bcopy(lgrp_snap->ss_cpuids, lgrp_cpuids32, cpuids_size); | |
2042 | |
2043 /* | |
2044 * Copy 32-bit lgroup snapshot into user's buffer all at once | |
2045 */ | |
2046 if (copyout(lgrp_snap32, (void *)(uintptr_t)buf, snap_size) != 0) { | |
2047 kmem_free(lgrp_snap32, snap_size); | |
2048 kmem_free(lgrp_lats32_kernel, snap_nlgrpsmax * sizeof (int *)); | |
2049 return (set_errno(EFAULT)); | |
2050 } | |
2051 | |
2052 kmem_free(lgrp_snap32, snap_size); | |
2053 kmem_free(lgrp_lats32_kernel, snap_nlgrpsmax * sizeof (int *)); | |
2054 | |
2055 return (snap_size); | |
2056 } | |
2057 #endif /* _SYSCALL32_IMPL */ | |
2058 | |
2059 | |
2060 int | |
2061 lgrpsys(int subcode, long ia, void *ap) | |
2062 { | |
2063 size_t bufsize; | |
2064 int latency; | |
2065 | |
2066 switch (subcode) { | |
2067 | |
2068 case LGRP_SYS_AFFINITY_GET: | |
2069 return (lgrp_affinity_get((lgrp_affinity_args_t *)ap)); | |
2070 | |
2071 case LGRP_SYS_AFFINITY_SET: | |
2072 return (lgrp_affinity_set((lgrp_affinity_args_t *)ap)); | |
2073 | |
2074 case LGRP_SYS_GENERATION: | |
2075 return (lgrp_generation(ia)); | |
2076 | |
2077 case LGRP_SYS_HOME: | |
2078 return (lgrp_home_get((idtype_t)ia, (id_t)(uintptr_t)ap)); | |
2079 | |
2080 case LGRP_SYS_LATENCY: | |
2081 mutex_enter(&cpu_lock); | |
2082 latency = lgrp_latency(ia, (lgrp_id_t)(uintptr_t)ap); | |
2083 mutex_exit(&cpu_lock); | |
2084 return (latency); | |
2085 | |
2086 case LGRP_SYS_MEMINFO: | |
2087 return (meminfo(ia, (struct meminfo *)ap)); | |
2088 | |
2089 case LGRP_SYS_VERSION: | |
2090 return (lgrp_version(ia)); | |
2091 | |
2092 case LGRP_SYS_SNAPSHOT: | |
2093 mutex_enter(&lgrp_snap_lock); | |
2094 bufsize = lgrp_snapshot(); | |
2095 if (ap && ia > 0) { | |
2096 if (get_udatamodel() == DATAMODEL_NATIVE) | |
2097 bufsize = lgrp_snapshot_copy(ap, ia); | |
2098 #ifdef _SYSCALL32_IMPL | |
2099 else | |
2100 bufsize = lgrp_snapshot_copy32( | |
2101 (caddr32_t)(uintptr_t)ap, ia); | |
2102 #endif /* _SYSCALL32_IMPL */ | |
2103 } | |
2104 mutex_exit(&lgrp_snap_lock); | |
2105 return (bufsize); | |
2106 | |
2107 default: | |
2108 break; | |
2109 | |
2110 } | |
2111 | |
2112 return (set_errno(EINVAL)); | |
2113 } |