changeset 10883:12fb4fb507d6 onnv_127

6893075 LatencyTOP hits "dtrace deadman" problem when -t > 30 sec 6889369 LatencyTOP drops data when run on a heavily-loaded and large-memoried system Portions contributed by Lejun Zhu <lejun.zhu@intel.com>
author Krishnendu Sadhukhan - Sun Microsystems <Krishnendu.Sadhukhan@Sun.COM>
date Mon, 26 Oct 2009 22:25:39 -0700
parents 640bb4bb88b6
children bec3912d153a
files usr/src/cmd/latencytop/common/dwrapper.c usr/src/cmd/latencytop/common/latencytop.c usr/src/cmd/latencytop/common/latencytop.h
diffstat 3 files changed, 49 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/cmd/latencytop/common/dwrapper.c	Tue Oct 27 11:32:58 2009 +0800
+++ b/usr/src/cmd/latencytop/common/dwrapper.c	Mon Oct 26 22:25:39 2009 -0700
@@ -359,6 +359,7 @@
 drop_handler(const dtrace_dropdata_t *data, void *user)
 {
 	lt_display_error("Drop: %s\n", data->dtdda_msg);
+	lt_drop_detected = B_TRUE;
 
 	/* Pretend nothing happened, so just continue */
 	return (DTRACE_HANDLE_OK);
@@ -565,9 +566,18 @@
 /*
  * dtrace clean up.
  */
-void
+int
 lt_dtrace_deinit(void)
 {
-	(void) dtrace_stop(g_dtp);
+	int ret = 0;
+
+	if (dtrace_stop(g_dtp) != 0) {
+		lt_display_error("dtrace_stop failed: %s\n",
+		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
+		ret = -1;
+	}
+
 	dtrace_close(g_dtp);
+
+	return (ret);
 }
--- a/usr/src/cmd/latencytop/common/latencytop.c	Tue Oct 27 11:32:58 2009 +0800
+++ b/usr/src/cmd/latencytop/common/latencytop.c	Mon Oct 26 22:25:39 2009 -0700
@@ -35,6 +35,12 @@
 
 #define	CMPOPT(a, b)	strncmp((a), (b), sizeof (b))
 
+/*
+ * This variable is used to check if "dynamic variable drop" in dtrace
+ * has happened.
+ */
+boolean_t lt_drop_detected = 0;
+
 lt_config_t g_config;
 
 typedef enum {
@@ -207,6 +213,7 @@
 	uint64_t current_time;
 	uint64_t delta_time;
 	char logfile[PATH_MAX] = "";
+	boolean_t no_dtrace_cleanup = B_TRUE;
 
 	lt_gpipe_init();
 	(void) signal(SIGINT, signal_handler);
@@ -421,8 +428,12 @@
 			break;
 		}
 
-		if (tsleep > g_config.lt_cfg_snap_interval * 1000) {
-			tsleep = g_config.lt_cfg_snap_interval * 1000;
+		/*
+		 * Interval when we call dtrace_status() and collect	
+		 * aggregated data.
+		 */
+		if (tsleep > g_config.lt_cfg_snap_interval) {
+			tsleep = g_config.lt_cfg_snap_interval;
 		}
 
 		timeout.tv_sec = tsleep / 1000;
@@ -458,6 +469,24 @@
 
 		running = lt_display_loop(refresh_interval * 1000 -
 		    delta_time);
+
+		/*
+		 * This is to avoid dynamic variable drop
+		 * in DTrace.
+		 */
+		if (lt_drop_detected == B_TRUE) {
+			if (lt_dtrace_deinit() != 0) {
+				no_dtrace_cleanup = B_FALSE;
+				retval = 1;
+				break;
+			}
+
+			lt_drop_detected = B_FALSE;
+			if (lt_dtrace_init() != 0) {
+				retval = 1;
+				break;
+			}
+		}
 	} while (running != 0);
 
 	lt_klog_write();
@@ -466,7 +495,9 @@
 	lt_display_deinit();
 
 end_ubreak:
-	lt_dtrace_deinit();
+	if (no_dtrace_cleanup == B_FALSE || lt_dtrace_deinit() != 0)
+		retval = 1;
+
 	lt_stat_free_all();
 
 end_nodtrace:
--- a/usr/src/cmd/latencytop/common/latencytop.h	Tue Oct 27 11:32:58 2009 +0800
+++ b/usr/src/cmd/latencytop/common/latencytop.h	Mon Oct 26 22:25:39 2009 -0700
@@ -169,13 +169,15 @@
 #define	CAUSE_FLAG_SPECIAL		4
 #define	CAUSE_ALL_FLAGS			0xffffffff
 
+extern boolean_t lt_drop_detected;
+
 /*
  * These functions collect statistics using DTrace.
  */
 extern int lt_dtrace_init(void);
 extern int lt_dtrace_work(int);
 extern int lt_dtrace_collect(void);
-extern void lt_dtrace_deinit(void);
+extern int lt_dtrace_deinit(void);
 
 /*
  * These functions maintain configuration, e.g. symbol to cause mapping.