Mercurial > illumos > illumos-gate
changeset 2957:a3f9fceeda60
FWARC 2006/545 max-vcpu and mondo-latency MD properties
6469894 xcall timeouts should be derived from the machine description
author | jm22469 |
---|---|
date | Fri, 20 Oct 2006 16:24:25 -0700 |
parents | 1940b23c5e82 |
children | 98aa41c076f5 |
files | usr/src/uts/sun4v/io/platsvc.c usr/src/uts/sun4v/os/fillsysinfo.c usr/src/uts/sun4v/os/mach_cpu_states.c usr/src/uts/sun4v/os/mach_mp_startup.c usr/src/uts/sun4v/os/mach_mp_states.c usr/src/uts/sun4v/promif/promif_emul.c |
diffstat | 6 files changed, 186 insertions(+), 20 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/uts/sun4v/io/platsvc.c Fri Oct 20 16:03:03 2006 -0700 +++ b/usr/src/uts/sun4v/io/platsvc.c Fri Oct 20 16:24:25 2006 -0700 @@ -219,6 +219,7 @@ { extern int mach_descrip_update(void); extern void mdeg_notify_clients(void); + extern void recalc_xc_timeouts(void); ds_svc_hdl_t ds_handle; platsvc_md_update_req_t *msg = buf; @@ -246,6 +247,8 @@ return; } + recalc_xc_timeouts(); + /* * notify registered clients that MD has * been updated
--- a/usr/src/uts/sun4v/os/fillsysinfo.c Fri Oct 20 16:03:03 2006 -0700 +++ b/usr/src/uts/sun4v/os/fillsysinfo.c Fri Oct 20 16:24:25 2006 -0700 @@ -652,7 +652,7 @@ ASSERT(nrnode == 1); - (void) md_get_prop_val(mdp, platlist[0], "max-vcpus", &ncpus); + (void) md_get_prop_val(mdp, platlist[0], "max-cpus", &ncpus); max_qsize = ncpus * CPU_MONDO_Q_MULTIPLIER; md_free_scan_dag(mdp, &platlist); @@ -779,7 +779,8 @@ nrnode = md_alloc_scan_dag(mdp, rootnode, "platform", "fwd", &platlist); - ASSERT(nrnode == 1); + if (nrnode < 1) + cmn_err(CE_PANIC, "init_md_broken: platform node missing"); if (md_get_prop_data(mdp, cpulist[0], "compatible", (uint8_t **)&namebuf, &namelen)) {
--- a/usr/src/uts/sun4v/os/mach_cpu_states.c Fri Oct 20 16:03:03 2006 -0700 +++ b/usr/src/uts/sun4v/os/mach_cpu_states.c Fri Oct 20 16:24:25 2006 -0700 @@ -1143,3 +1143,158 @@ out: kpreempt_enable(); } + +/* + * Recalculate the values of the cross-call timeout variables based + * on the value of the 'inter-cpu-latency' property of the platform node. + * The property sets the number of nanosec to wait for a cross-call + * to be acknowledged. Other timeout variables are derived from it. + * + * N.B. This implementation is aware of the internals of xc_init() + * and updates many of the same variables. + */ +void +recalc_xc_timeouts(void) +{ + typedef union { + uint64_t whole; + struct { + uint_t high; + uint_t low; + } half; + } u_number; + + /* See x_call.c for descriptions of these extern variables. */ + extern uint64_t xc_tick_limit_scale; + extern uint64_t xc_mondo_time_limit; + extern uint64_t xc_func_time_limit; + extern uint64_t xc_scale; + extern uint64_t xc_mondo_multiplier; + extern uint_t nsec_shift; + + /* Temp versions of the target variables */ + uint64_t tick_limit; + uint64_t tick_jump_limit; + uint64_t mondo_time_limit; + uint64_t func_time_limit; + uint64_t scale; + + uint64_t latency; /* nanoseconds */ + uint64_t maxfreq; + uint64_t tick_limit_save = xc_tick_limit; + uint_t tick_scale; + uint64_t top; + uint64_t bottom; + u_number tk; + + md_t *mdp; + int nrnode; + mde_cookie_t *platlist; + + /* + * Look up the 'inter-cpu-latency' (optional) property in the + * platform node of the MD. The units are nanoseconds. + */ + if ((mdp = md_get_handle()) == NULL) { + cmn_err(CE_WARN, "recalc_xc_timeouts: " + "Unable to initialize machine description"); + return; + } + + nrnode = md_alloc_scan_dag(mdp, + md_root_node(mdp), "platform", "fwd", &platlist); + + ASSERT(nrnode == 1); + if (nrnode < 1) { + cmn_err(CE_WARN, "recalc_xc_timeouts: platform node missing"); + return; + } + + if (md_get_prop_val(mdp, platlist[0], + "inter-cpu-latency", &latency) == -1) + return; + + /* + * clock.h defines an assembly-language macro + * (NATIVE_TIME_TO_NSEC_SCALE) to convert from %stick + * units to nanoseconds. Since the inter-cpu-latency + * units are nanoseconds and the xc_* variables require + * %stick units, we need the inverse of that function. + * The trick is to perform the calculation without + * floating point, but also without integer truncation + * or overflow. To understand the calculation below, + * please read the discussion of the macro in clock.h. + * Since this new code will be invoked infrequently, + * we can afford to implement it in C. + * + * tick_scale is the reciprocal of nsec_scale which is + * calculated at startup in setcpudelay(). The calc + * of tick_limit parallels that of NATIVE_TIME_TO_NSEC_SCALE + * except we use tick_scale instead of nsec_scale and + * C instead of assembler. + */ + tick_scale = (uint_t)(((u_longlong_t)sys_tick_freq + << (32 - nsec_shift)) / NANOSEC); + + tk.whole = latency; + top = ((uint64_t)tk.half.high << 4) * tick_scale; + bottom = (((uint64_t)tk.half.low << 4) * (uint64_t)tick_scale) >> 32; + tick_limit = top + bottom; + + + /* + * xc_init() calculated 'maxfreq' by looking at all the cpus, + * and used it to derive some of the timeout variables that we + * recalculate below. We can back into the original value by + * using the inverse of one of those calculations. + */ + maxfreq = xc_mondo_time_limit / xc_scale; + + /* + * Don't allow the new timeout (xc_tick_limit) to fall below + * the system tick frequency (stick). Allowing the timeout + * to be set more tightly than this empirically determined + * value may cause panics. + */ + tick_limit = tick_limit < sys_tick_freq ? sys_tick_freq : tick_limit; + + tick_jump_limit = tick_limit / 32; + tick_limit *= xc_tick_limit_scale; + + /* + * Recalculate xc_scale since it is used in a callback function + * (xc_func_timeout_adj) to adjust two of the timeouts dynamically. + * Make the change in xc_scale proportional to the change in + * xc_tick_limit. + */ + scale = (xc_scale * tick_limit + sys_tick_freq / 2) / tick_limit_save; + if (scale == 0) + scale = 1; + + mondo_time_limit = maxfreq * scale; + func_time_limit = mondo_time_limit * xc_mondo_multiplier; + + /* + * Don't modify the timeouts if nothing has changed. Else, + * stuff the variables with the freshly calculated (temp) + * variables. This minimizes the window where the set of + * values could be inconsistent. + */ + if (tick_limit != xc_tick_limit) { + xc_tick_limit = tick_limit; + xc_tick_jump_limit = tick_jump_limit; + xc_scale = scale; + xc_mondo_time_limit = mondo_time_limit; + xc_func_time_limit = func_time_limit; + /* + * Force the new values to be used for future cross + * calls. This is necessary only when we increase + * the timeouts. + */ + if (tick_limit > tick_limit_save) { + cpuset_t cpuset = cpu_ready_set; + + xt_sync(cpuset); + } + } +}
--- a/usr/src/uts/sun4v/os/mach_mp_startup.c Fri Oct 20 16:03:03 2006 -0700 +++ b/usr/src/uts/sun4v/os/mach_mp_startup.c Fri Oct 20 16:24:25 2006 -0700 @@ -32,6 +32,7 @@ #include <sys/cpu_sgnblk_defs.h> #include <sys/mdesc.h> #include <sys/mach_descrip.h> +#include <sys/ldoms.h> /* * Useful for disabling MP bring-up for an MP capable kernel @@ -222,3 +223,22 @@ return (0); } + +/* + * Platform-specific actions to be taken when all cpus are running + * in the OS. + */ +void +cpu_mp_init(void) +{ + extern void recalc_xc_timeouts(); + extern int cif_cpu_mp_ready; + + /* N.B. This must happen after xc_init() has run. */ + recalc_xc_timeouts(); + + if (!(domaining_capabilities & DOMAINING_ENABLED)) + return; + + cif_cpu_mp_ready = 1; +}
--- a/usr/src/uts/sun4v/os/mach_mp_states.c Fri Oct 20 16:03:03 2006 -0700 +++ b/usr/src/uts/sun4v/os/mach_mp_states.c Fri Oct 20 16:24:25 2006 -0700 @@ -61,8 +61,9 @@ * Stop a CPU based on its cpuid, using the cpu_stop hypervisor call. * Since this requires that the hypervisor force a remote CPU to stop, * the assumption is made that this should take roughly the same amount - * of time as a CPU mondo. Consequently, the mondo timeout is used to - * determine when to give up waiting for the CPU to stop. + * of time as a executing a cross-call. Consequently, the xcall + * timeout is used to determine when to give up waiting for the CPU to + * stop. * * Attempts to stop a CPU already in the stopped or error state will * silently succeed. Zero is returned on success and a non-negative @@ -77,7 +78,7 @@ uint64_t major = 0; uint64_t minor = 0; uint64_t cpu_stop_time_limit; - extern uint64_t xc_mondo_time_limit; + extern uint64_t xc_func_time_limit; ASSERT(MUTEX_HELD(&cpu_lock)); @@ -109,7 +110,7 @@ return (ENOTSUP); /* use the mondo timeout if it has been initialized */ - cpu_stop_time_limit = xc_mondo_time_limit; + cpu_stop_time_limit = xc_func_time_limit; /* * If called early in boot before the mondo time limit
--- a/usr/src/uts/sun4v/promif/promif_emul.c Fri Oct 20 16:03:03 2006 -0700 +++ b/usr/src/uts/sun4v/promif/promif_emul.c Fri Oct 20 16:24:25 2006 -0700 @@ -312,18 +312,4 @@ promif_io_init(); } - -/* - * Platform-specific actions to be taken when all cpus are running - * in the OS. - */ -void -cpu_mp_init(void) -{ - if (!(domaining_capabilities & DOMAINING_ENABLED)) - return; - - cif_cpu_mp_ready = 1; -} - #endif /* _KMDB */