Mercurial > illumos > illumos-gate
changeset 10883:12fb4fb507d6 onnv_127
6893075 LatencyTOP hits "dtrace deadman" problem when -t > 30 sec
6889369 LatencyTOP drops data when run on a heavily-loaded and large-memoried system
Portions contributed by Lejun Zhu <lejun.zhu@intel.com>
author | Krishnendu Sadhukhan - Sun Microsystems <Krishnendu.Sadhukhan@Sun.COM> |
---|---|
date | Mon, 26 Oct 2009 22:25:39 -0700 |
parents | 640bb4bb88b6 |
children | bec3912d153a |
files | usr/src/cmd/latencytop/common/dwrapper.c usr/src/cmd/latencytop/common/latencytop.c usr/src/cmd/latencytop/common/latencytop.h |
diffstat | 3 files changed, 49 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/cmd/latencytop/common/dwrapper.c Tue Oct 27 11:32:58 2009 +0800 +++ b/usr/src/cmd/latencytop/common/dwrapper.c Mon Oct 26 22:25:39 2009 -0700 @@ -359,6 +359,7 @@ drop_handler(const dtrace_dropdata_t *data, void *user) { lt_display_error("Drop: %s\n", data->dtdda_msg); + lt_drop_detected = B_TRUE; /* Pretend nothing happened, so just continue */ return (DTRACE_HANDLE_OK); @@ -565,9 +566,18 @@ /* * dtrace clean up. */ -void +int lt_dtrace_deinit(void) { - (void) dtrace_stop(g_dtp); + int ret = 0; + + if (dtrace_stop(g_dtp) != 0) { + lt_display_error("dtrace_stop failed: %s\n", + dtrace_errmsg(g_dtp, dtrace_errno(g_dtp))); + ret = -1; + } + dtrace_close(g_dtp); + + return (ret); }
--- a/usr/src/cmd/latencytop/common/latencytop.c Tue Oct 27 11:32:58 2009 +0800 +++ b/usr/src/cmd/latencytop/common/latencytop.c Mon Oct 26 22:25:39 2009 -0700 @@ -35,6 +35,12 @@ #define CMPOPT(a, b) strncmp((a), (b), sizeof (b)) +/* + * This variable is used to check if "dynamic variable drop" in dtrace + * has happened. + */ +boolean_t lt_drop_detected = 0; + lt_config_t g_config; typedef enum { @@ -207,6 +213,7 @@ uint64_t current_time; uint64_t delta_time; char logfile[PATH_MAX] = ""; + boolean_t no_dtrace_cleanup = B_TRUE; lt_gpipe_init(); (void) signal(SIGINT, signal_handler); @@ -421,8 +428,12 @@ break; } - if (tsleep > g_config.lt_cfg_snap_interval * 1000) { - tsleep = g_config.lt_cfg_snap_interval * 1000; + /* + * Interval when we call dtrace_status() and collect + * aggregated data. + */ + if (tsleep > g_config.lt_cfg_snap_interval) { + tsleep = g_config.lt_cfg_snap_interval; } timeout.tv_sec = tsleep / 1000; @@ -458,6 +469,24 @@ running = lt_display_loop(refresh_interval * 1000 - delta_time); + + /* + * This is to avoid dynamic variable drop + * in DTrace. + */ + if (lt_drop_detected == B_TRUE) { + if (lt_dtrace_deinit() != 0) { + no_dtrace_cleanup = B_FALSE; + retval = 1; + break; + } + + lt_drop_detected = B_FALSE; + if (lt_dtrace_init() != 0) { + retval = 1; + break; + } + } } while (running != 0); lt_klog_write(); @@ -466,7 +495,9 @@ lt_display_deinit(); end_ubreak: - lt_dtrace_deinit(); + if (no_dtrace_cleanup == B_FALSE || lt_dtrace_deinit() != 0) + retval = 1; + lt_stat_free_all(); end_nodtrace:
--- a/usr/src/cmd/latencytop/common/latencytop.h Tue Oct 27 11:32:58 2009 +0800 +++ b/usr/src/cmd/latencytop/common/latencytop.h Mon Oct 26 22:25:39 2009 -0700 @@ -169,13 +169,15 @@ #define CAUSE_FLAG_SPECIAL 4 #define CAUSE_ALL_FLAGS 0xffffffff +extern boolean_t lt_drop_detected; + /* * These functions collect statistics using DTrace. */ extern int lt_dtrace_init(void); extern int lt_dtrace_work(int); extern int lt_dtrace_collect(void); -extern void lt_dtrace_deinit(void); +extern int lt_dtrace_deinit(void); /* * These functions maintain configuration, e.g. symbol to cause mapping.