changeset 10673:b22eb20aa9ca

PSARC/2009/339 LatencyTOP for OpenSolaris 6825817 Integrate latencyTOP into OpenSolaris Contributed by Lejun Zhu <lejun.zhu@intel.com>
author Krishnendu Sadhukhan - Sun Microsystems <Krishnendu.Sadhukhan@Sun.COM>
date Mon, 28 Sep 2009 13:53:34 -0700
parents cee5a0f557db
children bea34d7fb70a
files usr/src/Makefile.lint usr/src/cmd/Makefile usr/src/cmd/latencytop/Makefile usr/src/cmd/latencytop/Makefile.com usr/src/cmd/latencytop/amd64/Makefile usr/src/cmd/latencytop/common/display.c usr/src/cmd/latencytop/common/dwrapper.c usr/src/cmd/latencytop/common/klog.c usr/src/cmd/latencytop/common/latencytop.c usr/src/cmd/latencytop/common/latencytop.d usr/src/cmd/latencytop/common/latencytop.h usr/src/cmd/latencytop/common/latencytop.trans usr/src/cmd/latencytop/common/stat.c usr/src/cmd/latencytop/common/table.c usr/src/cmd/latencytop/common/util.c usr/src/cmd/latencytop/i386/Makefile usr/src/cmd/latencytop/sparcv9/Makefile usr/src/pkgdefs/Makefile usr/src/pkgdefs/SUNWlatencytop/Makefile usr/src/pkgdefs/SUNWlatencytop/depend usr/src/pkgdefs/SUNWlatencytop/pkginfo.tmpl usr/src/pkgdefs/SUNWlatencytop/prototype_com usr/src/pkgdefs/SUNWlatencytop/prototype_i386 usr/src/pkgdefs/SUNWlatencytop/prototype_sparc
diffstat 24 files changed, 5731 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/Makefile.lint	Mon Sep 28 14:54:22 2009 -0700
+++ b/usr/src/Makefile.lint	Mon Sep 28 13:53:34 2009 -0700
@@ -233,6 +233,7 @@
 	cmd/pools \
 	cmd/power \
 	cmd/powertop \
+	cmd/latencytop \
 	cmd/ppgsz \
 	cmd/praudit \
 	cmd/prctl \
--- a/usr/src/cmd/Makefile	Mon Sep 28 14:54:22 2009 -0700
+++ b/usr/src/cmd/Makefile	Mon Sep 28 13:53:34 2009 -0700
@@ -227,6 +227,7 @@
 	kstat		\
 	last		\
 	lastcomm	\
+	latencytop	\
 	ldap		\
 	ldapcachemgr	\
 	lgrpinfo	\
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/latencytop/Makefile	Mon Sep 28 13:53:34 2009 -0700
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2008-2009, Intel Corporation.
+# All Rights Reserved.
+#
+
+PROG = latencytop
+
+include ../Makefile.cmd
+
+$(64ONLY)SUBDIRS= $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all	:=	TARGET = all
+install	:=	TARGET = install
+clean	:=	TARGET = clean
+clobber	:=	TARGET = clobber
+lint	:=	TARGET = lint
+
+.KEEP_STATE:
+
+all install clean clobber lint:	$(SUBDIRS)
+
+$(SUBDIRS):	FRC
+	@cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../Makefile.targ
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/latencytop/Makefile.com	Mon Sep 28 13:53:34 2009 -0700
@@ -0,0 +1,79 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2008-2009, Intel Corporation.
+# All Rights Reserved.
+#
+
+PROG = latencytop
+OBJS = latencytop.o display.o dwrapper.o klog.o stat.o table.o util.o
+SRCS = $(OBJS:%.o=../common/%.c)
+
+include ../../Makefile.cmd
+
+CFLAGS += $(CCVERBOSE)
+CFLAGS64 += $(CCVERBOSE)
+
+CPPFLAGS += -DEMBED_CONFIGS -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include
+C99MODE = $(C99_ENABLE)
+LDLIBS += -lcurses -ldtrace
+all install	:= LDLIBS += -lglib-2.0
+
+LINTFLAGS += -erroff=E_NAME_USED_NOT_DEF2
+LINTFLAGS64 += -erroff=E_NAME_USED_NOT_DEF2
+
+FILEMODE = 0555
+
+ELFWRAP = elfwrap
+WRAPOBJ = latencytop_wrap.o
+
+CLEANFILES += $(OBJS) $(WRAPOBJ) ./latencytop_d ./latencytop_trans
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+install:        $(SUBDIRS)
+	-$(RM) $(ROOTPROG)
+	-$(LN) $(ISAEXEC) $(ROOTPROG)
+
+$(PROG): $(OBJS) $(WRAPOBJ)
+	$(LINK.c) -o $@ $(OBJS) $(WRAPOBJ) $(LDLIBS)
+	$(POST_PROCESS)
+
+$(WRAPOBJ): latencytop_d latencytop_trans
+	$(ELFWRAP) $(WRAPOPT) -o $(WRAPOBJ) latencytop_d latencytop_trans
+
+latencytop_d:
+	cp ../common/latencytop.d ./latencytop_d
+
+latencytop_trans:
+	cp ../common/latencytop.trans ./latencytop_trans
+
+clean:
+	$(RM) $(CLEANFILES)
+
+lint:	lint_SRCS
+
+%.o: ../common/%.c
+	$(COMPILE.c) $<
+
+include ../../Makefile.targ
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/latencytop/amd64/Makefile	Mon Sep 28 13:53:34 2009 -0700
@@ -0,0 +1,30 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2008-2009, Intel Corporation.
+# All Rights Reserved.
+#
+
+include ../Makefile.com
+include ../../Makefile.cmd.64
+
+WRAPOPT = -64
+
+install: all $(ROOTPROG64)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/latencytop/common/display.c	Mon Sep 28 13:53:34 2009 -0700
@@ -0,0 +1,1040 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2008-2009, Intel Corporation.
+ * All Rights Reserved.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <dirent.h>
+#include <curses.h>
+#include <time.h>
+#include <wchar.h>
+#include <ctype.h>
+#include <stdarg.h>
+#include <signal.h>
+
+#include "latencytop.h"
+
+#define	LT_WINDOW_X			80
+#define	LT_WINDOW_Y			24
+
+#define	LT_COLOR_DEFAULT		1
+#define	LT_COLOR_HEADER			2
+
+/* Windows created by libcurses */
+static WINDOW	*titlebar = NULL;
+static WINDOW	*captionbar = NULL;
+static WINDOW	*sysglobal_window = NULL;
+static WINDOW	*taskbar = NULL;
+static WINDOW	*process_window = NULL;
+static WINDOW	*hintbar = NULL;
+/* Screen dimension */
+static int	screen_width = 1, screen_height = 1;
+/* Is display initialized, i.e. are window pointers set up. */
+static int	display_initialized = FALSE;
+/* Is initscr() called */
+static int	curses_inited = FALSE;
+
+/* To handle user key presses */
+static pid_t selected_pid = INVALID_PID;
+static id_t selected_tid = INVALID_TID;
+static lt_sort_t sort_type = LT_SORT_TOTAL;
+static int thread_mode = FALSE;
+/* Type of list being displayed */
+static int current_list_type = LT_LIST_CAUSE;
+static int show_help = FALSE;
+
+/* Help functions that append/prepend a blank to the given string */
+#define	fill_space_right(a, b, c)	fill_space((a), (b), (c), TRUE)
+#define	fill_space_left(a, b, c)	fill_space((a), (b), (c), FALSE)
+
+static void
+fill_space(char *buffer, int len, int buffer_limit, int is_right)
+{
+	int i = 0;
+	int tofill;
+
+	if (len >= buffer_limit) {
+		len = buffer_limit - 1;
+	}
+
+	i = strlen(buffer);
+
+	if (i >= len) {
+		return;
+	}
+
+	tofill = len - i;
+
+	if (is_right) {
+		(void) memset(&buffer[i], ' ', tofill);
+		buffer[len] = '\0';
+	} else {
+		(void) memmove(&buffer[tofill], buffer, i+1);
+		(void) memset(buffer, ' ', tofill);
+	}
+}
+
+/* Convert the nanosecond value to a human readable string */
+static const char *
+get_time_string(double nanoseconds, char *buffer, int len, int fill_width)
+{
+	const double ONE_USEC = 1000.0;
+	const double ONE_MSEC = 1000000.0;
+	const double ONE_SEC  = 1000000000.0;
+
+	if (nanoseconds < (ONE_USEC - .5)) {
+		(void) snprintf(buffer, len, "%3.1f nsec", nanoseconds);
+	} else if (nanoseconds < (ONE_MSEC - .5 * ONE_USEC)) {
+		(void) snprintf(buffer, len,
+		    "%3.1f usec", nanoseconds / ONE_USEC);
+	} else if (nanoseconds < (ONE_SEC - .5 * ONE_MSEC)) {
+		(void) snprintf(buffer, len,
+		    "%3.1f msec", nanoseconds / ONE_MSEC);
+	} else if (nanoseconds < 999.5 * ONE_SEC) {
+		(void) snprintf(buffer, len,
+		    "%3.1f  sec", nanoseconds / ONE_SEC);
+	} else {
+		(void) snprintf(buffer, len,
+		    "%.0e sec", nanoseconds / ONE_SEC);
+	}
+
+	fill_space_left(buffer, fill_width, len);
+	return (buffer);
+}
+
+/* Used in print_statistics below */
+#define	WIDTH_REASON_STRING	36
+#define	WIDTH_COUNT		12
+#define	WIDTH_AVG		12
+#define	WIDTH_MAX		12
+#define	WIDTH_PCT		8
+#define	BEGIN_COUNT		WIDTH_REASON_STRING
+#define	BEGIN_AVG		(BEGIN_COUNT + WIDTH_COUNT)
+#define	BEGIN_MAX		(BEGIN_AVG + WIDTH_AVG)
+#define	BEGIN_PCT		(BEGIN_MAX + WIDTH_MAX)
+
+/*
+ * Print statistics in global/process pane. Called by print_sysglobal
+ * print_process.
+ *
+ * Parameters:
+ *		window - the global or process statistics window.
+ *		begin_line - where to start printing.
+ *		count - how many lines should be printed.
+ *		list - a stat_list.
+ */
+static void
+print_statistics(WINDOW * window, int begin_line, int nlines, void *list)
+{
+	uint64_t total;
+	int i = 0;
+
+	if (!display_initialized) {
+		return;
+	}
+
+	total = lt_stat_list_get_gtotal(list);
+
+	if (total == 0) {
+		return;
+	}
+
+	while (i < nlines && lt_stat_list_has_item(list, i)) {
+
+		char tmp[WIDTH_REASON_STRING];
+		const char *reason = lt_stat_list_get_reason(list, i);
+		uint64_t count = lt_stat_list_get_count(list, i);
+
+		if (count == 0) {
+			continue;
+		}
+
+		(void) snprintf(tmp, sizeof (tmp), "%s", reason);
+		(void) mvwprintw(window, i + begin_line, 0, "%s", tmp);
+
+		(void) snprintf(tmp, sizeof (tmp), "%llu", count);
+		fill_space_left(tmp, WIDTH_COUNT, sizeof (tmp));
+		(void) mvwprintw(window, i + begin_line, BEGIN_COUNT,
+		    "%s", tmp);
+
+		(void) mvwprintw(window, i + begin_line, BEGIN_AVG,
+		    "%s", get_time_string(
+		    (double)lt_stat_list_get_sum(list, i) / count,
+		    tmp, sizeof (tmp), WIDTH_AVG));
+
+		(void) mvwprintw(window, i + begin_line, BEGIN_MAX,
+		    "%s", get_time_string(
+		    (double)lt_stat_list_get_max(list, i),
+		    tmp, sizeof (tmp), WIDTH_MAX));
+
+		if (LT_LIST_SPECIALS != current_list_type) {
+			(void) snprintf(tmp, sizeof (tmp), "%.1f %%",
+			    (double)lt_stat_list_get_sum(list, i)
+			    / total * 100.0);
+		} else {
+			(void) snprintf(tmp, sizeof (tmp), "--- ");
+		}
+
+		fill_space_left(tmp, WIDTH_PCT, sizeof (tmp));
+
+		(void) mvwprintw(window, i + begin_line, BEGIN_PCT,
+		    "%s", tmp);
+		i++;
+	}
+}
+
+/*
+ * Print statistics in global pane.
+ */
+static void
+print_sysglobal(void)
+{
+	void *list;
+	char header[256];
+
+	if (!display_initialized) {
+		return;
+	}
+
+	(void) werase(sysglobal_window);
+
+	(void) wattron(sysglobal_window, A_REVERSE);
+	(void) snprintf(header, sizeof (header),
+	    "%s", "System wide latencies");
+	fill_space_right(header, screen_width, sizeof (header));
+	(void) mvwprintw(sysglobal_window, 0, 0, "%s", header);
+	(void) wattroff(sysglobal_window, A_REVERSE);
+
+	list = lt_stat_list_create(current_list_type,
+	    LT_LEVEL_GLOBAL, 0, 0, 10, sort_type);
+	print_statistics(sysglobal_window, 1, 10, list);
+	lt_stat_list_free(list);
+
+	(void) wrefresh(sysglobal_window);
+}
+
+/*
+ * Prints current operation mode. Mode is combination of:
+ *
+ * 	"Process or Thread", and "1 or 2 or 3".
+ */
+static void
+print_current_mode()
+{
+	char type;
+
+	if (!display_initialized) {
+		return;
+	}
+
+	switch (current_list_type) {
+	case LT_LIST_CAUSE:
+		type = '1';
+		break;
+	case LT_LIST_SPECIALS:
+		type = '2';
+		break;
+	case LT_LIST_SOBJ:
+		type = '3';
+		break;
+	default:
+		type = '?';
+		break;
+	}
+
+	(void) mvwprintw(process_window, 0, screen_width - 8, "View: %c%c",
+	    type, thread_mode ? 'T' : 'P');
+}
+
+/*
+ * Print per-process statistics in process pane.
+ * This is called when mode of operation is process.
+ */
+static void
+print_process(unsigned int pid)
+{
+	void *list;
+	char header[256];
+	char tmp[30];
+
+	if (!display_initialized) {
+		return;
+	}
+
+	list = lt_stat_list_create(current_list_type, LT_LEVEL_PROCESS,
+	    pid, 0, 8, sort_type);
+
+	(void) werase(process_window);
+	(void) wattron(process_window, A_REVERSE);
+	(void) snprintf(header, sizeof (header), "Process %s (%i), %d threads",
+	    lt_stat_proc_get_name(pid), pid, lt_stat_proc_get_nthreads(pid));
+	fill_space_right(header, screen_width, sizeof (header));
+	(void) mvwprintw(process_window, 0, 0, "%s", header);
+
+	if (current_list_type != LT_LIST_SPECIALS) {
+		(void) mvwprintw(process_window, 0, 48, "Total: %s",
+		    get_time_string((double)lt_stat_list_get_gtotal(list),
+		    tmp, sizeof (tmp), 12));
+	}
+
+	print_current_mode();
+	(void) wattroff(process_window, A_REVERSE);
+	print_statistics(process_window, 1, 8, list);
+	lt_stat_list_free(list);
+
+	(void) wrefresh(process_window);
+}
+
+/*
+ * Display the list of processes that are tracked, in task bar.
+ * This one is called when mode of operation is process.
+ */
+static void
+print_taskbar_process(pid_t *pidlist, int pidlist_len, int pidlist_index)
+{
+	const int ITEM_WIDTH = 8;
+
+	int number_item;
+	int i;
+	int xpos = 0;
+
+	if (!display_initialized) {
+		return;
+	}
+
+	number_item = (screen_width / ITEM_WIDTH) - 1;
+	i = pidlist_index - (pidlist_index % number_item);
+
+	(void) werase(taskbar);
+
+	if (i != 0) {
+		(void) mvwprintw(taskbar, 0, xpos, "<-");
+	}
+
+	xpos = ITEM_WIDTH / 2;
+
+	while (xpos + ITEM_WIDTH <= screen_width && i < pidlist_len) {
+		char str[ITEM_WIDTH+1];
+		int slen;
+		const char *pname = lt_stat_proc_get_name(pidlist[i]);
+
+		if (pname && pname[0]) {
+			(void) snprintf(str, sizeof (str) - 1, "%s", pname);
+		} else {
+			(void) snprintf(str, sizeof (str) - 1,
+			    "<%d>", pidlist[i]);
+		}
+
+		slen = strlen(str);
+
+		if (slen < ITEM_WIDTH) {
+			(void) memset(&str[slen], ' ', ITEM_WIDTH - slen);
+		}
+
+		str[sizeof (str) - 1] = '\0';
+
+		if (i == pidlist_index) {
+			(void) wattron(taskbar, A_REVERSE);
+		}
+
+		(void) mvwprintw(taskbar, 0, xpos, "%s", str);
+
+		if (i == pidlist_index) {
+			(void) wattroff(taskbar, A_REVERSE);
+		}
+
+		xpos += ITEM_WIDTH;
+		i++;
+	}
+
+	if (i != pidlist_len) {
+		(void) mvwprintw(taskbar, 0, screen_width - 2, "->");
+	}
+
+	(void) wrefresh(taskbar);
+}
+
+/*
+ * Display the list of processes that are tracked, in task bar.
+ * This one is called when mode of operation is thread.
+ */
+static void
+print_taskbar_thread(pid_t *pidlist, id_t *tidlist, int list_len,
+    int list_index)
+{
+	const int ITEM_WIDTH = 12;
+
+	int number_item;
+	int i;
+	int xpos = 0;
+	const char *pname = NULL;
+	pid_t last_pid = INVALID_PID;
+
+
+	if (!display_initialized) {
+		return;
+	}
+
+	number_item = (screen_width - 8) / ITEM_WIDTH;
+	i = list_index - (list_index % number_item);
+
+	(void) werase(taskbar);
+
+	if (i != 0) {
+		(void) mvwprintw(taskbar, 0, xpos, "<-");
+	}
+
+	xpos = 4;
+
+	while (xpos + ITEM_WIDTH <= screen_width && i < list_len) {
+		char str[ITEM_WIDTH+1];
+		int slen, tlen;
+
+		if (pidlist[i] != last_pid) {
+			pname = lt_stat_proc_get_name(pidlist[i]);
+			last_pid = pidlist[i];
+		}
+
+		/*
+		 * Calculate length of thread's ID; use shorter process name
+		 * in order to save space on the screen.
+		 */
+		tlen = snprintf(NULL, 0, "_%d", tidlist[i]);
+
+		if (pname && pname[0]) {
+			(void) snprintf(str, sizeof (str) - tlen - 1,
+			    "%s", pname);
+		} else {
+			(void) snprintf(str, sizeof (str) - tlen - 1,
+			    "<%d>", pidlist[i]);
+		}
+
+		slen = strlen(str);
+
+		(void) snprintf(&str[slen], sizeof (str) - slen,
+		    "_%d", tidlist[i]);
+
+		slen += tlen;
+
+		if (slen < ITEM_WIDTH) {
+			(void) memset(&str[slen], ' ', ITEM_WIDTH - slen);
+		}
+
+		str[sizeof (str) - 1] = '\0';
+
+		if (i == list_index) {
+			(void) wattron(taskbar, A_REVERSE);
+		}
+
+		(void) mvwprintw(taskbar, 0, xpos, "%s", str);
+
+		if (i == list_index) {
+			(void) wattroff(taskbar, A_REVERSE);
+		}
+
+		xpos += ITEM_WIDTH;
+		i++;
+	}
+
+	if (i != list_len) {
+		(void) mvwprintw(taskbar, 0, screen_width - 2, "->");
+	}
+
+	(void) wrefresh(taskbar);
+}
+
+/*
+ * Print per-thread statistics in process pane.
+ * This is called when mode of operation is thread.
+ */
+static void
+print_thread(pid_t pid, id_t tid)
+{
+	void *list;
+	char header[256];
+	char tmp[30];
+
+	if (!display_initialized) {
+		return;
+	}
+
+	list = lt_stat_list_create(current_list_type, LT_LEVEL_THREAD,
+	    pid, tid, 8, sort_type);
+
+	(void) werase(process_window);
+	(void) wattron(process_window, A_REVERSE);
+	(void) snprintf(header, sizeof (header),
+	    "Process %s (%i), LWP %d",
+	    lt_stat_proc_get_name(pid), pid, tid);
+	fill_space_right(header, screen_width, sizeof (header));
+	(void) mvwprintw(process_window, 0, 0, "%s", header);
+
+	if (current_list_type != LT_LIST_SPECIALS) {
+		(void) mvwprintw(process_window, 0, 48, "Total: %s",
+		    get_time_string(
+		    (double)lt_stat_list_get_gtotal(list),
+		    tmp, sizeof (tmp), 12));
+	}
+
+	print_current_mode();
+	(void) wattroff(process_window, A_REVERSE);
+	print_statistics(process_window, 1, 8, list);
+	lt_stat_list_free(list);
+	(void) wrefresh(process_window);
+}
+
+/*
+ * Update hint string at the bottom line. The message to print is stored in
+ * hint. If hint is NULL, the function will display its own message.
+ */
+static void
+print_hint(const char *hint)
+{
+	const char *HINTS[] =    {
+		"Press '<' or '>' to switch between processes.",
+		"Press 'q' to exit.",
+		"Press 'r' to refresh immediately.",
+		"Press 't' to toggle Process/Thread display mode.",
+		"Press 'h' for help.",
+		"Use 'c', 'a', 'm', 'p' to change sort criteria."
+		"Use '1', '2', '3' to switch between windows."
+	};
+	const uint64_t update_interval = 5000; /* 5 seconds */
+
+	static int index = 0;
+	static uint64_t next_hint = 0;
+	uint64_t now = lt_millisecond();
+
+	if (!display_initialized) {
+		return;
+	}
+
+	if (hint == NULL) {
+		if (now < next_hint) {
+			return;
+		}
+
+		hint = HINTS[index];
+		index = (index + 1) % (sizeof (HINTS) / sizeof (HINTS[0]));
+		next_hint = now + update_interval;
+	} else {
+		/*
+		 * Important messages are displayed at least every 2 cycles.
+		 */
+		next_hint = now + update_interval * 2;
+	}
+
+	(void) werase(hintbar);
+	(void) mvwprintw(hintbar, 0, (screen_width - strlen(hint)) / 2,
+	    "%s", hint);
+	(void) wrefresh(hintbar);
+}
+
+/*
+ * Create a PID list or a PID/TID list (if operation mode is thread) from
+ * available statistics.
+ */
+static void
+get_plist(pid_t **plist, id_t **tlist, int *list_len, int *list_index)
+{
+	if (!thread_mode) {
+		/* Per-process mode */
+		*list_len = lt_stat_proc_list_create(plist, NULL);
+
+		/* Search for previously selected PID */
+		for (*list_index = 0; *list_index < *list_len &&
+		    (*plist)[*list_index] != selected_pid;
+		    ++*list_index) {
+		}
+
+		if (*list_index >= *list_len) {
+			/*
+			 * The previously selected pid is gone.
+			 * Select the first one.
+			 */
+			*list_index = 0;
+		}
+	} else {
+		/* Per-thread mode */
+		*list_len = lt_stat_proc_list_create(plist, tlist);
+
+		/* Search for previously selected PID & TID */
+		for (*list_index = 0; *list_index < *list_len;
+		    ++*list_index) {
+			if ((*plist)[*list_index] == selected_pid &&
+			    (*tlist)[*list_index] == selected_tid) {
+				break;
+			}
+		}
+
+		if (*list_index >= *list_len) {
+			/*
+			 * The previously selected pid/tid is gone.
+			 * Select the first one.
+			 */
+			for (*list_index = 0;
+			    *list_index < *list_len &&
+			    (*plist)[*list_index] != selected_pid;
+			    ++*list_index) {
+			}
+		}
+
+		if (*list_index >= *list_len) {
+			/*
+			 * The previously selected pid is gone.
+			 * Select the first one
+			 */
+			*list_index = 0;
+		}
+	}
+}
+
+/* Print help message when user presses 'h' hot key */
+static void
+print_help(void)
+{
+	const char *HELP[] =    {
+		TITLE,
+		COPYRIGHT,
+		"",
+		"These single-character commands are available:",
+		"<       - Move to previous process/thread.",
+		">       - Move to next process/thread.",
+		"q       - Exit.",
+		"r       - Refresh.",
+		"t       - Toggle process/thread mode.",
+		"c       - Sort by count.",
+		"a       - Sort by average.",
+		"m       - Sort by maximum.",
+		"p       - Sort by percent.",
+		"1       - Show list by causes.",
+		"2       - Show list of special entries.",
+		"3       - Show list by synchronization objects.",
+		"h       - Show this help.",
+		"",
+		"Press any key to continue..."
+	};
+	int i;
+
+	if (!display_initialized) {
+		return;
+	}
+
+	for (i = 0; i < sizeof (HELP) / sizeof (HELP[0]); ++i) {
+		(void) mvwprintw(stdscr, i, 0, "%s", HELP[i]);
+	}
+
+	(void) refresh();
+}
+
+/*
+ * Print title on screen
+ */
+static void
+print_title(void)
+{
+	if (!display_initialized) {
+		return;
+	}
+
+	(void) wattrset(titlebar, COLOR_PAIR(LT_COLOR_HEADER));
+	(void) wbkgd(titlebar, COLOR_PAIR(LT_COLOR_HEADER));
+	(void) werase(titlebar);
+
+	(void) mvwprintw(titlebar, 0, (screen_width - strlen(TITLE)) / 2,
+	    "%s", TITLE);
+	(void) wrefresh(titlebar);
+
+	(void) werase(captionbar);
+	(void) mvwprintw(captionbar, 0, 0, "%s",
+	    "               Cause                    "
+	    "Count      Average     Maximum   Percent");
+	(void) wrefresh(captionbar);
+
+	(void) wattrset(hintbar, COLOR_PAIR(LT_COLOR_HEADER));
+	(void) wbkgd(hintbar, COLOR_PAIR(LT_COLOR_HEADER));
+}
+
+/*
+ * Handle signal from terminal resize
+ */
+/* ARGSUSED */
+static void
+on_resize(int sig)
+{
+	lt_gpipe_break("r");
+}
+
+/*
+ * Initialize display. Display will be cleared when this function returns.
+ */
+void
+lt_display_init(void)
+{
+	if (display_initialized) {
+		return;
+	}
+
+	/* Window resize signal */
+	(void) signal(SIGWINCH, on_resize);
+
+	/* Initialize curses library */
+	(void) initscr();
+	(void) start_color();
+	(void) keypad(stdscr, TRUE);
+	(void) nonl();
+	(void) cbreak();
+	(void) noecho();
+	(void) curs_set(0);
+
+	/* Set up color pairs */
+	(void) init_pair(LT_COLOR_DEFAULT, COLOR_WHITE, COLOR_BLACK);
+	(void) init_pair(LT_COLOR_HEADER, COLOR_BLACK, COLOR_WHITE);
+
+	curses_inited = TRUE;
+	getmaxyx(stdscr, screen_height, screen_width);
+
+	if (screen_width < LT_WINDOW_X || screen_height < LT_WINDOW_Y) {
+		(void) mvwprintw(stdscr, 0, 0, "Terminal size is too small.");
+		(void) mvwprintw(stdscr, 1, 0,
+		    "Please resize it to 80x24 or larger.");
+		(void) mvwprintw(stdscr, 2, 0, "Press q to quit.");
+		(void) refresh();
+		return;
+	}
+
+	/* Set up all window panes */
+	titlebar = subwin(stdscr, 1, screen_width, 0, 0);
+	captionbar = subwin(stdscr, 1, screen_width, 1, 0);
+	sysglobal_window = subwin(stdscr, screen_height / 2 - 1,
+	    screen_width, 2, 0);
+	process_window = subwin(stdscr, screen_height / 2 - 3,
+	    screen_width, screen_height / 2 + 1, 0);
+	taskbar = subwin(stdscr, 1, screen_width, screen_height - 2, 0);
+	hintbar = subwin(stdscr, 1, screen_width, screen_height - 1, 0);
+	(void) werase(stdscr);
+	(void) refresh();
+
+	display_initialized = TRUE;
+
+	print_title();
+}
+
+/*
+ * The event loop for display. It displays data on screen and handles hotkey
+ * presses.
+ *
+ * Parameter :
+ *		duration - returns after 'duration'
+ *
+ * The function also returns if user presses 'q', 'Ctrl+C' or 'r'.
+ *
+ * Return value:
+ *		0 - main() exits
+ *		1 - main() calls it again
+ */
+int
+lt_display_loop(int duration)
+{
+	uint64_t start;
+	int remaining;
+	struct timeval timeout;
+	fd_set read_fd;
+	int need_refresh = TRUE;
+	pid_t *plist = NULL;
+	id_t *tlist = NULL;
+	int list_len = 0;
+	int list_index = 0;
+	int retval = 1;
+	int next_snap;
+	int gpipe;
+
+	start = lt_millisecond();
+	gpipe = lt_gpipe_readfd();
+
+	if (!show_help) {
+		print_hint(NULL);
+		print_sysglobal();
+	}
+
+	get_plist(&plist, &tlist, &list_len, &list_index);
+
+	for (;;) {
+		if (list_len != 0 && need_refresh && !show_help) {
+			if (!thread_mode) {
+				print_taskbar_process(plist, list_len,
+				    list_index);
+				print_process(plist[list_index]);
+			} else {
+				print_taskbar_thread(plist, tlist,
+				    list_len, list_index);
+				print_thread(plist[list_index],
+				    tlist[list_index]);
+			}
+		}
+
+		need_refresh = TRUE;	/* Usually we need refresh. */
+		remaining = duration - (int)(lt_millisecond() - start);
+
+		if (remaining <= 0) {
+			break;
+		}
+
+		/* Embedded dtrace snap action here. */
+		next_snap = lt_dtrace_work(0);
+
+		if (next_snap == 0) {
+			/*
+			 * Just did a snap, check time for the next one.
+			 */
+			next_snap = lt_dtrace_work(0);
+		}
+
+		if (next_snap > 0 && remaining > next_snap) {
+			remaining = next_snap;
+		}
+
+		timeout.tv_sec = remaining / 1000;
+		timeout.tv_usec = (remaining % 1000) * 1000;
+
+		FD_ZERO(&read_fd);
+		FD_SET(0, &read_fd);
+		FD_SET(gpipe, &read_fd);
+
+		/* Wait for keyboard input, or signal from gpipe */
+		if (select(gpipe + 1, &read_fd, NULL, NULL, &timeout) > 0) {
+			int k = 0;
+
+			if (FD_ISSET(gpipe, &read_fd)) {
+				/* Data from pipe has priority */
+				char ch;
+				(void) read(gpipe, &ch, 1);
+				k = ch; /* Need this for big-endianness */
+			} else {
+				k = getch();
+			}
+
+			/*
+			 * Check if we need to update the hint line whenever we
+			 * get a chance.
+			 * NOTE: current implementation depends on
+			 * g_config.lt_cfg_snap_interval, but it's OK because it
+			 * doesn't have to be precise.
+			 */
+			print_hint(NULL);
+			/*
+			 * If help is on display right now, and a key press
+			 * happens, we need to clear the help and continue.
+			 */
+			if (show_help) {
+				(void) werase(stdscr);
+				(void) refresh();
+				print_title();
+				print_sysglobal();
+				show_help = FALSE;
+				/* Drop this key and continue */
+				continue;
+			}
+
+			switch (k) {
+			case 'Q':
+			case 'q':
+				retval = 0;
+				goto quit;
+			case 'R':
+			case 'r':
+				lt_display_deinit();
+				lt_display_init();
+				goto quit;
+			case 'H':
+			case 'h':
+				show_help = TRUE;
+				(void) werase(stdscr);
+				(void) refresh();
+				print_help();
+				break;
+			case ',':
+			case '<':
+			case KEY_LEFT:
+				--list_index;
+
+				if (list_index < 0) {
+					list_index = 0;
+				}
+
+				break;
+			case '.':
+			case '>':
+			case KEY_RIGHT:
+				++list_index;
+
+				if (list_index >= list_len) {
+					list_index = list_len - 1;
+				}
+
+				break;
+			case 'a':
+			case 'A':
+				sort_type = LT_SORT_AVG;
+				print_sysglobal();
+				break;
+			case 'p':
+			case 'P':
+				sort_type = LT_SORT_TOTAL;
+				print_sysglobal();
+				break;
+			case 'm':
+			case 'M':
+				sort_type = LT_SORT_MAX;
+				print_sysglobal();
+				break;
+			case 'c':
+			case 'C':
+				sort_type = LT_SORT_COUNT;
+				print_sysglobal();
+				break;
+			case 't':
+			case 'T':
+				if (plist != NULL) {
+					selected_pid = plist[list_index];
+				}
+
+				selected_tid = INVALID_TID;
+				thread_mode = !thread_mode;
+				get_plist(&plist, &tlist,
+				    &list_len, &list_index);
+				break;
+			case '1':
+			case '!':
+				current_list_type = LT_LIST_CAUSE;
+				print_sysglobal();
+				break;
+			case '2':
+			case '@':
+				if (g_config.lt_cfg_low_overhead_mode) {
+					lt_display_error("Switching mode is "
+					    "not available for '-f low'.");
+				} else {
+					current_list_type = LT_LIST_SPECIALS;
+					print_sysglobal();
+				}
+
+				break;
+			case '3':
+			case '#':
+				if (g_config.lt_cfg_trace_syncobj) {
+					current_list_type = LT_LIST_SOBJ;
+					print_sysglobal();
+				} else if (g_config.lt_cfg_low_overhead_mode) {
+					lt_display_error("Switching mode is "
+					    "not available for '-f low'.");
+				} else {
+					lt_display_error("Tracing "
+					    "synchronization objects is "
+					    "disabled.");
+				}
+
+				break;
+			default:
+				/* Wake up for nothing; no refresh is needed */
+				need_refresh = FALSE;
+				break;
+			}
+		} else {
+			need_refresh = FALSE;
+		}
+	}
+
+quit:
+	if (plist != NULL) {
+		selected_pid = plist[list_index];
+	}
+
+	if (tlist != NULL) {
+		selected_tid = tlist[list_index];
+	}
+
+	lt_stat_proc_list_free(plist, tlist);
+
+	return (retval);
+}
+
+/*
+ * Clean up display.
+ */
+void
+lt_display_deinit(void)
+{
+	if (curses_inited) {
+		(void) clear();
+		(void) refresh();
+		(void) endwin();
+	}
+
+	titlebar = NULL;
+	captionbar = NULL;
+	sysglobal_window = NULL;
+	taskbar = NULL;
+	process_window = NULL;
+	hintbar = NULL;
+	screen_width = 1;
+	screen_height = 1;
+
+	display_initialized = FALSE;
+	curses_inited = FALSE;
+}
+
+/*
+ * Print message when display error happens.
+ */
+/* ARGSUSED */
+void
+lt_display_error(const char *fmt, ...)
+{
+	va_list vl;
+	char tmp[81];
+	int l;
+
+	va_start(vl, fmt);
+	(void) vsnprintf(tmp, sizeof (tmp), fmt, vl);
+	va_end(vl);
+
+	l = strlen(tmp);
+
+	while (l > 0 && (tmp[l - 1] == '\n' || tmp[l - 1] == '\r')) {
+		tmp[l - 1] = '\0';
+		--l;
+	}
+
+	if (!display_initialized) {
+		(void) fprintf(stderr, "%s\n", tmp);
+	} else if (!show_help) {
+		print_hint(tmp);
+	}
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/latencytop/common/dwrapper.c	Mon Sep 28 13:53:34 2009 -0700
@@ -0,0 +1,573 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2008-2009, Intel Corporation.
+ * All Rights Reserved.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <dtrace.h>
+#include <string.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <limits.h>
+
+#include "latencytop.h"
+
+static dtrace_hdl_t *g_dtp = NULL;	/* dtrace handle */
+static pid_t pid_self = -1;		/* PID of our own process */
+
+/*
+ * Ignore sched if sched is not tracked.
+ * Also ignore ourselves (i.e., latencytop).
+ */
+#define	SHOULD_IGNORE(pid)		\
+	((!g_config.lt_cfg_trace_sched && 0 == (pid)) || pid_self == (pid))
+
+/*
+ * Get an integer value from dtrace record.
+ */
+static uint64_t
+rec_get_value(void *a, size_t b)
+{
+	uint64_t ret = 0;
+
+	switch (b) {
+	case sizeof (uint64_t):
+		ret = *((uint64_t *)(a));
+		break;
+	case sizeof (uint32_t):
+		ret = *((uint32_t *)(a));
+		break;
+	case sizeof (uint16_t):
+		ret = *((uint16_t *)(a));
+		break;
+	case sizeof (uint8_t):
+		ret = *((uint8_t *)(a));
+		break;
+	default:
+		break;
+	}
+
+	return (ret);
+}
+
+/*
+ * Callback to process aggregation lt_call_* (related to on/off cpu
+ * activities) in the snapshot.
+ */
+static int
+aggwalk_call(const dtrace_aggdata_t *data, lt_stat_type_t stat_type)
+{
+	dtrace_aggdesc_t *aggdesc = data->dtada_desc;
+	dtrace_syminfo_t dts;
+	GElf_Sym sym;
+	caddr_t addr;
+	pid_t pid;
+	id_t tid;
+	unsigned int stack_depth;
+	unsigned int pc_size;
+	uint64_t pc;
+	uint64_t agg_value;
+	char *ptr = NULL;
+	char *buffer = NULL;
+	int ptrsize;
+	unsigned int buffersize;
+	char *tag = NULL;
+	unsigned int priority;
+	enum { REC_PID = 1, REC_TID, REC_STACK, REC_TAG, REC_PRIO, REC_AGG,
+	    NREC };
+
+	/* Check action type */
+	if ((aggdesc->dtagd_nrecs < NREC) ||
+	    (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) ||
+	    (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) ||
+	    (aggdesc->dtagd_rec[REC_TAG].dtrd_action != DTRACEACT_DIFEXPR) ||
+	    (aggdesc->dtagd_rec[REC_PRIO].dtrd_action != DTRACEACT_DIFEXPR) ||
+	    (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action)) ||
+	    (aggdesc->dtagd_rec[REC_STACK].dtrd_action != DTRACEACT_STACK)) {
+
+		return (-1);
+	}
+
+	pid = rec_get_value(
+	    data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset,
+	    aggdesc->dtagd_rec[REC_PID].dtrd_size);
+
+	if (SHOULD_IGNORE(pid)) {
+		return (0);
+	}
+
+	tid = rec_get_value(
+	    data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset,
+	    aggdesc->dtagd_rec[REC_TID].dtrd_size);
+
+	/* Parse stack array from dtagd_rec */
+	stack_depth = aggdesc->dtagd_rec[REC_STACK].dtrd_arg;
+	pc_size = aggdesc->dtagd_rec[REC_STACK].dtrd_size / stack_depth;
+	addr = data->dtada_data + aggdesc->dtagd_rec[REC_STACK].dtrd_offset;
+	buffersize = (stack_depth * (2 * PATH_MAX + 2) + 1) * sizeof (char);
+	buffer = (char *)lt_malloc(buffersize);
+	ptr = buffer;
+	ptrsize = buffersize;
+
+	/* Print the stack */
+	while (stack_depth > 0) {
+		pc = rec_get_value(addr, pc_size);
+
+		if (pc == 0) {
+			break;
+		}
+
+		addr += pc_size;
+
+		if (dtrace_lookup_by_addr(g_dtp, pc, &sym, &dts) == 0) {
+			int len;
+			len = snprintf(ptr, ptrsize,
+			    "%s`%s ", dts.dts_object, dts.dts_name);
+			ptrsize -= len;
+
+			if (ptrsize <= 0) {
+				/*
+				 * snprintf returns "desired" length, so
+				 * reaching here means our buffer is full.
+				 * Move ptr to the last byte of the buffer and
+				 * break.
+				 */
+				ptr = &buffer[buffersize-1];
+				break;
+			} else {
+				ptr += len;
+			}
+		}
+	}
+
+	if (ptr != buffer) {
+		/*
+		 * We have printed something, so it is safe to remove
+		 * the last ' '.
+		 */
+		*(ptr-1) = '\0';
+	}
+
+	tag = (char *)data->dtada_data +
+	    aggdesc->dtagd_rec[REC_TAG].dtrd_offset;
+
+	priority = rec_get_value(
+	    data->dtada_data + aggdesc->dtagd_rec[REC_PRIO].dtrd_offset,
+	    aggdesc->dtagd_rec[REC_PRIO].dtrd_size);
+
+	agg_value = rec_get_value(
+	    data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset,
+	    aggdesc->dtagd_rec[REC_AGG].dtrd_size);
+
+	lt_stat_update(pid, tid, buffer, tag, priority, stat_type, agg_value);
+
+	if (buffer != NULL)  {
+		free(buffer);
+	}
+
+	return (0);
+}
+
+/*
+ * Callback to process aggregation lt_named_* (related to lock spinning etc.),
+ * in the snapshot.
+ */
+static int
+aggwalk_named(const dtrace_aggdata_t *data, lt_stat_type_t stat_type)
+{
+	dtrace_aggdesc_t *aggdesc = data->dtada_desc;
+	pid_t pid;
+	id_t tid;
+	uint64_t agg_value;
+	int cause_id;
+	char *type = NULL;
+	enum { REC_PID = 1, REC_TID, REC_TYPE, REC_AGG, NREC };
+
+	/* Check action type */
+	if ((aggdesc->dtagd_nrecs < NREC) ||
+	    (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) ||
+	    (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) ||
+	    (aggdesc->dtagd_rec[REC_TYPE].dtrd_action != DTRACEACT_DIFEXPR) ||
+	    (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action))) {
+
+		return (-1);
+	}
+
+	pid = rec_get_value(
+	    data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset,
+	    aggdesc->dtagd_rec[REC_PID].dtrd_size);
+
+	if (SHOULD_IGNORE(pid)) {
+		return (0);
+	}
+
+	tid = rec_get_value(
+	    data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset,
+	    aggdesc->dtagd_rec[REC_TID].dtrd_size);
+
+	type = (char *)data->dtada_data
+	    + aggdesc->dtagd_rec[REC_TYPE].dtrd_offset;
+	cause_id = lt_table_cause_from_name(type, 1, CAUSE_FLAG_SPECIAL);
+
+	agg_value = rec_get_value(
+	    data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset,
+	    aggdesc->dtagd_rec[REC_AGG].dtrd_size);
+
+	lt_stat_update_cause(pid, tid, cause_id, stat_type, agg_value);
+
+	return (0);
+
+}
+
+/*
+ * Callback to process aggregation lt_sync_* (related to synchronization
+ * objects), in the snapshot.
+ */
+static int
+aggwalk_sync(const dtrace_aggdata_t *data, lt_stat_type_t stat_type)
+{
+	dtrace_aggdesc_t *aggdesc = data->dtada_desc;
+	pid_t pid;
+	id_t tid;
+	uint64_t agg_value;
+	int stype;
+	unsigned long long wchan;
+	enum { REC_PID = 1, REC_TID, REC_STYPE, REC_WCHAN, REC_AGG, NREC };
+
+	/* Check action type */
+	if ((aggdesc->dtagd_nrecs < NREC) ||
+	    (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) ||
+	    (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) ||
+	    (aggdesc->dtagd_rec[REC_STYPE].dtrd_action != DTRACEACT_DIFEXPR) ||
+	    (aggdesc->dtagd_rec[REC_WCHAN].dtrd_action != DTRACEACT_DIFEXPR) ||
+	    (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action))) {
+
+		return (-1);
+	}
+
+	pid = rec_get_value(
+	    data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset,
+	    aggdesc->dtagd_rec[REC_PID].dtrd_size);
+
+	if (SHOULD_IGNORE(pid)) {
+		return (0);
+	}
+
+	tid = rec_get_value(
+	    data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset,
+	    aggdesc->dtagd_rec[REC_TID].dtrd_size);
+
+	stype = rec_get_value(
+	    data->dtada_data + aggdesc->dtagd_rec[REC_STYPE].dtrd_offset,
+	    aggdesc->dtagd_rec[REC_STYPE].dtrd_size);
+
+	wchan = rec_get_value(
+	    data->dtada_data + aggdesc->dtagd_rec[REC_WCHAN].dtrd_offset,
+	    aggdesc->dtagd_rec[REC_WCHAN].dtrd_size);
+
+	agg_value = rec_get_value(
+	    data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset,
+	    aggdesc->dtagd_rec[REC_AGG].dtrd_size);
+
+	lt_stat_update_sobj(pid, tid, stype, wchan, stat_type, agg_value);
+
+	return (0);
+}
+
+/*
+ * Callback to process various aggregations in the snapshot. Called by
+ * different aggwalk_* functions.
+ */
+/* ARGSUSED */
+static int
+aggwalk(const dtrace_aggdata_t *data, void *arg)
+{
+	char *tmp;
+	char buffer[32];
+	lt_stat_type_t stat_type;
+	int (*func)(const dtrace_aggdata_t *, lt_stat_type_t);
+
+	(void) strncpy(buffer, data->dtada_desc->dtagd_name, sizeof (buffer));
+	buffer[sizeof (buffer) - 1] = '\0';
+	tmp = strtok(buffer, "_");
+
+	if (tmp == NULL || strcmp(tmp, "lt") != 0) {
+		goto done;
+	}
+
+	tmp = strtok(NULL, "_");
+
+	if (tmp == NULL) {
+		goto done;
+	} else if (strcmp(tmp, "call") == 0) {
+		func = aggwalk_call;
+	} else if (strcmp(tmp, "named") == 0) {
+		func = aggwalk_named;
+	} else if (strcmp(tmp, "sync") == 0) {
+		func = aggwalk_sync;
+	} else {
+		goto done;
+	}
+
+	tmp = strtok(NULL, "_");
+
+	if (tmp == NULL) {
+		goto done;
+	} else if (strcmp(tmp, "count") == 0) {
+		stat_type = LT_STAT_COUNT;
+	} else if (strcmp(tmp, "sum") == 0) {
+		stat_type = LT_STAT_SUM;
+	} else if (strcmp(tmp, "max") == 0) {
+		stat_type = LT_STAT_MAX;
+	} else {
+		goto done;
+	}
+
+	(void) func(data, stat_type);
+
+done:
+	/* We have our data, so remove it from DTrace now */
+	return (DTRACE_AGGWALK_REMOVE);
+}
+
+/*
+ * Callback to handle event caused by DTrace dropping data.
+ */
+/*ARGSUSED*/
+static int
+drop_handler(const dtrace_dropdata_t *data, void *user)
+{
+	lt_display_error("Drop: %s\n", data->dtdda_msg);
+
+	/* Pretend nothing happened, so just continue */
+	return (DTRACE_HANDLE_OK);
+}
+
+#ifndef EMBED_CONFIGS
+/*
+ * Copy the content from a "real" file into a temp file.
+ */
+static int
+copy_tmp_file(const char *src, FILE *dst)
+{
+	FILE *tmp = NULL;
+	char buffer[256];
+	int bytes;
+
+	if ((tmp = fopen(src, "r")) == NULL) {
+		return (-1);
+	}
+
+	while ((bytes = fread(buffer, 1, sizeof (buffer), tmp)) > 0) {
+		if (fwrite(buffer, bytes, 1, dst) != 1) {
+			return (-1);
+		}
+	}
+
+	(void) fclose(tmp);
+
+	return (0);
+}
+#endif
+
+/*
+ * DTrace initialization. D script starts running when this function returns.
+ */
+int
+lt_dtrace_init(void)
+{
+	dtrace_prog_t *prog;
+	dtrace_proginfo_t info;
+	int err;
+	FILE *fp_script = NULL;
+
+	pid_self = getpid();
+
+	if ((g_dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) {
+		lt_display_error("Cannot open dtrace library: %s\n",
+		    dtrace_errmsg(NULL, err));
+		return (-1);
+	}
+
+	if (dtrace_handle_drop(g_dtp, &drop_handler, NULL) == -1) {
+		lt_display_error("Cannot install DTrace handle: %s\n",
+		    dtrace_errmsg(NULL, err));
+		return (-1);
+	}
+
+	if (g_config.lt_cfg_enable_filter) {
+		if ((err = dtrace_setopt(g_dtp, "define",
+		    "ENABLE_FILTER")) != 0) {
+			lt_display_error(
+			    "Failed to set option ENABLE_FILTER.\n");
+			return (err);
+		}
+	}
+
+	if (g_config.lt_cfg_trace_syncobj) {
+		if ((err = dtrace_setopt(g_dtp, "define",
+		    "ENABLE_SYNCOBJ")) != 0) {
+			lt_display_error(
+			    "Failed to set option ENABLE_SYNCOBJ.\n");
+			return (err);
+		}
+	}
+
+	if (g_config.lt_cfg_trace_sched) {
+		if ((err = dtrace_setopt(g_dtp, "define",
+		    "ENABLE_SCHED")) != 0) {
+			lt_display_error(
+			    "Failed to set option ENABLE_SYNCOBJ.\n");
+			return (err);
+		}
+	}
+
+	if (g_config.lt_cfg_low_overhead_mode) {
+		if ((err = dtrace_setopt(g_dtp, "define",
+		    "ENABLE_LOW_OVERHEAD")) != 0) {
+			lt_display_error(
+			    "Failed to set option ENABLE_SYNCOBJ.\n");
+			return (err);
+		}
+	}
+
+	/* Create a temp file; libdtrace needs it for cpp(1) */
+	if ((fp_script = tmpfile()) == NULL) {
+		lt_display_error("Cannot create tmp file\n");
+		return (-1);
+	}
+
+	/* Copy the main D script into the temp file */
+#ifdef EMBED_CONFIGS
+	if (fwrite(&latencytop_d_start,
+	    (size_t)(&latencytop_d_end - &latencytop_d_start), 1, fp_script)
+	    != 1) {
+		lt_display_error("Could not copy D script, fwrite() failed\n");
+		(void) fclose(fp_script);
+		return (-1);
+	}
+#else
+	if (copy_tmp_file(DEFAULT_D_SCRIPT_NAME, fp_script) != 0) {
+		lt_display_error("Cannot open script file %s\n",
+		    DEFAULT_D_SCRIPT_NAME);
+		(void) fclose(fp_script);
+		return (-1);
+	}
+#endif	/* EMBED_CONFIGS */
+
+	if (lt_table_append_trans(fp_script) != 0) {
+		(void) fclose(fp_script);
+		return (-1);
+	}
+
+	(void) fseek(fp_script, 0, SEEK_SET);
+
+	if ((prog = dtrace_program_fcompile(g_dtp, fp_script,
+	    DTRACE_C_CPP, 0, NULL)) == NULL) {
+		lt_display_error("Failed to compile D script.\n");
+		(void) fclose(fp_script);
+		return (dtrace_errno(g_dtp));
+	}
+
+	(void) fclose(fp_script);
+
+	/* Execute the D script */
+	if (dtrace_program_exec(g_dtp, prog, &info) == -1) {
+		lt_display_error("Failed to enable probes.\n");
+		return (dtrace_errno(g_dtp));
+	}
+
+	if (dtrace_go(g_dtp) != 0) {
+		lt_display_error("Failed to run D script.\n");
+		return (dtrace_errno(g_dtp));
+	}
+
+	return (0);
+}
+
+/*
+ * Worker function to move aggregate data to user space. Called periodically
+ * to prevent the kernel from running out of memory.
+ */
+int
+lt_dtrace_work(int force)
+{
+	static uint64_t last_snap = 0;
+	uint64_t now = lt_millisecond();
+
+	if (!force && now - last_snap < g_config.lt_cfg_snap_interval) {
+		return (last_snap + g_config.lt_cfg_snap_interval - now);
+	}
+
+	if (dtrace_status(g_dtp) == -1) {
+		lt_display_error("Failed when getting status: %s\n",
+		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
+		return (-1);
+	}
+
+	if (dtrace_aggregate_snap(g_dtp) != 0) {
+		lt_display_error("Failed to snap aggregate: %s\n",
+		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
+		return (-1);
+	}
+
+	last_snap = now;
+	return (0);
+}
+
+/*
+ * Walk through dtrace aggregator and collect data for latencytop to display.
+ * Called immediately before UI update.
+ */
+int
+lt_dtrace_collect(void)
+{
+	if (lt_dtrace_work(1) != 0) {
+		return (-1);
+	}
+
+	if (dtrace_aggregate_walk(g_dtp, aggwalk, NULL) != 0) {
+		lt_display_error("Failed to sort aggregate: %s\n",
+		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
+		return (-1);
+	}
+
+	/*
+	 * Probably we don't need to clear again, because we have removed
+	 * everything. Paranoid ?
+	 */
+	dtrace_aggregate_clear(g_dtp);
+
+	return (0);
+}
+
+/*
+ * dtrace clean up.
+ */
+void
+lt_dtrace_deinit(void)
+{
+	(void) dtrace_stop(g_dtp);
+	dtrace_close(g_dtp);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/latencytop/common/klog.c	Mon Sep 28 13:53:34 2009 -0700
@@ -0,0 +1,226 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2008-2009, Intel Corporation.
+ * All Rights Reserved.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <string.h>
+#include <procfs.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <unistd.h>
+
+#include "latencytop.h"
+
+static GHashTable *proc_table = NULL; /* pid -> char * */
+static GHashTable *klog_table = NULL; /* char * -> uint64_t total */
+static char klog_filename[PATH_MAX] = DEFAULT_KLOG_FILE;
+static int klog_level = LT_KLOG_LEVEL_NONE;
+
+static void
+print_proc(void *key, const char *args, FILE *fp)
+{
+	(void) fprintf(fp, "%-8ld \"%s\"\n", (long)key, args);
+}
+
+static void
+print_stat(const char *key, lt_stat_data_t *log, FILE *fp)
+{
+	(void) fprintf(fp, "%lld, %lld, %lld, %s\n",
+	    (long long)log->lt_s_total,
+	    (long long)log->lt_s_count,
+	    (long long)log->lt_s_max,
+	    key);
+}
+
+/*
+ * Initialization for kernel logging.
+ */
+void
+lt_klog_init(void)
+{
+	if (klog_table != NULL || proc_table != NULL) {
+		return;
+	}
+
+	klog_table = g_hash_table_new_full(g_str_hash, g_str_equal,
+	    (GDestroyNotify)free, (GDestroyNotify)free);
+	lt_check_null(klog_table);
+
+	proc_table = g_hash_table_new_full(g_direct_hash, g_direct_equal,
+	    NULL, (GDestroyNotify)free);
+	lt_check_null(proc_table);
+}
+
+/*
+ * Set log file path.
+ */
+int
+lt_klog_set_log_file(const char *filename)
+{
+	FILE *fp;
+	int file_exist;
+
+	g_assert(strlen(filename) < sizeof (klog_filename));
+
+	file_exist = lt_file_exist(filename);
+	/* Test if we can write to the file */
+	fp = fopen(filename, "a");
+
+	if (fp == NULL) {
+		return (-2);
+	}
+
+	(void) fclose(fp);
+	/* Don't leave empty file around */
+	if (!file_exist) {
+		(void) unlink(filename);
+	}
+
+	(void) strncpy(klog_filename, filename,
+	    sizeof (klog_filename));
+
+	return (0);
+}
+
+/*
+ * Set log level.
+ */
+int
+lt_klog_set_log_level(int level)
+{
+	if (level < 0 || level > (int)LT_KLOG_LEVEL_ALL) {
+		return (-1);
+	}
+
+	klog_level = level;
+
+	return (0);
+}
+
+/*
+ * Write content to log file.
+ */
+void
+lt_klog_write(void)
+{
+	FILE *fp;
+	char buffer[32];
+
+	if (klog_level == LT_KLOG_LEVEL_NONE) {
+		return;
+	}
+
+	g_assert(klog_table != NULL && proc_table != NULL);
+	fp = fopen(klog_filename, "a");
+
+	if (fp == NULL) {
+		return;
+	}
+
+	lt_time_str(buffer, sizeof (buffer));
+
+	(void) fprintf(fp, "# Log generated at %s by %s\n", buffer, TITLE);
+	(void) fprintf(fp, "# List of processes\n");
+	(void) fprintf(fp, "PID, CMD\n");
+	g_hash_table_foreach(proc_table, (GHFunc)print_proc, fp);
+
+	(void) fprintf(fp, "# Statistics\n");
+	(void) fprintf(fp, "TOTAL, COUNT, MAX, PID, KSTACK\n");
+	g_hash_table_foreach(klog_table, (GHFunc)print_stat, fp);
+
+	(void) fclose(fp);
+}
+
+/*
+ * Clean up. It flushes all log content in memory to log file.
+ */
+void
+lt_klog_deinit(void)
+{
+	if (klog_table != NULL) {
+		g_hash_table_destroy(klog_table);
+		klog_table = NULL;
+	}
+
+	if (proc_table != NULL) {
+		g_hash_table_destroy(proc_table);
+		proc_table = NULL;
+	}
+}
+
+/*
+ * Write a kernel stack and its statistics to log file. Only "total" will
+ * be logged, others are internally discarded.
+ */
+/* ARGSUSED */
+void
+lt_klog_log(int level, pid_t pid, char *stack,
+	lt_stat_type_t type, uint64_t value)
+{
+	lt_stat_data_t *entry = NULL;
+	char *psargs;
+	char *str;
+	int str_len;
+
+	if ((level & klog_level) == 0) {
+		return;
+	}
+
+	g_assert(klog_table != NULL && proc_table != NULL);
+	psargs = (char *)g_hash_table_lookup(proc_table,
+	    LT_INT_TO_POINTER(pid));
+
+	if (psargs == NULL) {
+		psargs = lt_get_proc_field(pid, LT_FIELD_PSARGS);
+
+		if (psargs == NULL) {
+			psargs = lt_get_proc_field(pid, LT_FIELD_FNAME);
+		}
+
+		if (psargs == NULL) {
+			return;
+		}
+
+		g_hash_table_insert(proc_table,
+		    LT_INT_TO_POINTER(pid), psargs);
+	}
+
+	str_len = strlen(stack) + 20;
+	str = lt_malloc(str_len);
+	(void) snprintf(str, str_len, "%ld, \"%s\"", pid, stack);
+	entry = (lt_stat_data_t *)g_hash_table_lookup(klog_table, str);
+
+	if (entry == NULL) {
+		entry = (lt_stat_data_t *)lt_zalloc(sizeof (lt_stat_data_t));
+		g_hash_table_insert(klog_table, str, entry);
+	} else {
+		free(str);
+	}
+
+	lt_update_stat_value(entry, type, value);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/latencytop/common/latencytop.c	Mon Sep 28 13:53:34 2009 -0700
@@ -0,0 +1,486 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2008-2009, Intel Corporation.
+ * All Rights Reserved.
+ */
+
+#include <unistd.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <libgen.h>
+#include <signal.h>
+#include "latencytop.h"
+
+#define	CMPOPT(a, b)	strncmp((a), (b), sizeof (b))
+
+lt_config_t g_config;
+
+typedef enum {
+	LT_CMDOPT_INTERVAL,
+	LT_CMDOPT_LOG_FILE,
+	LT_CMDOPT_LOG_LEVEL,
+	LT_CMDOPT_LOG_INTERVAL,
+	LT_CMDOPT_CONFIG_FILE,
+	LT_CMDOPT_F_FILTER,
+	LT_CMDOPT_F_SCHED,
+	LT_CMDOPT_F_SOBJ,
+	LT_CMDOPT_F_LOW,
+	LT_CMDOPT__LAST	/* Must be last one */
+} lt_cmd_option_id_t;
+
+/*
+ * Check for duplicate command line options.
+ * Returns TRUE if duplicate options with different values are found,
+ * returns FALSE otherwise.
+ */
+static int
+check_opt_dup(lt_cmd_option_id_t id, uint64_t value) {
+
+	static int opt_set[(int)LT_CMDOPT__LAST];
+	static uint64_t opt_val[(int)LT_CMDOPT__LAST];
+
+	const char *errmsg[] = {
+		"-t is set more than once with different values.",
+		"-o is set more than once.",
+		"-k is set more than once with different values.",
+		"-l is set more than once with different values.",
+		"-c is set more than once.",
+		"-f [no]filter is set more than once with different values.",
+		"-f [no]sched is set more than once with different values.",
+		"-f [no]sobj is set more than once with different values.",
+		"-f [no]low is set more than once with different values.",
+	};
+
+	g_assert(sizeof (errmsg)/sizeof (errmsg[0]) == (int)LT_CMDOPT__LAST);
+
+	if (!opt_set[(int)id]) {
+		opt_set[(int)id] = TRUE;
+		opt_val[(int)id] = value;
+		return (FALSE);
+	}
+
+	if (opt_val[(int)id] != value) {
+		(void) fprintf(stderr, "%s\n", errmsg[(int)id]);
+		return (TRUE);
+	}
+
+	return (FALSE);
+}
+
+/*
+ * Print command-line help message.
+ */
+static void
+print_usage(const char *execname, int long_help)
+{
+	char buffer[PATH_MAX];
+	(void) snprintf(buffer, sizeof (buffer), "%s", execname);
+
+	if (!long_help) {
+		/* Print short help to stderr. */
+		(void) fprintf(stderr, "Usage: %s [option(s)], ",
+		    basename(buffer));
+		(void) fprintf(stderr, "use '%s -h' for details.\n",
+		    basename(buffer));
+		return;
+	}
+
+	(void) printf("Usage: %s [option(s)]\n", basename(buffer));
+	(void) printf("Options:\n"
+	    "    -h, --help\n"
+	    "        Print this help.\n"
+	    "    -t, --interval TIME\n"
+	    "        Set refresh interval to TIME. "
+	    "Valid range [1...60] seconds, default = 5\n"
+	/*
+	 * Option "-c, --config FILE" is not user-visible for now.
+	 * When we have chance to properly document the format of translation
+	 * rules, we'll make it user-visible.
+	 */
+	    "    -o, --output-log-file FILE\n"
+	    "        Output kernel log to FILE. Default = "
+	    DEFAULT_KLOG_FILE "\n"
+	    "    -k, --kernel-log-level LEVEL\n"
+	    "        Set kernel log level to LEVEL.\n"
+	    "        0(default) = None, 1 = Unmapped, 2 = Mapped, 3 = All.\n"
+	    "    -f, --feature [no]feature1,[no]feature2,...\n"
+	    "        Enable/disable features in LatencyTOP.\n"
+	    "        [no]filter:\n"
+	    "        Filter large interruptible latencies, e.g. sleep.\n"
+	    "        [no]sched:\n"
+	    "        Monitors sched (PID=0).\n"
+	    "        [no]sobj:\n"
+	    "        Monitors synchronization objects.\n"
+	    "        [no]low:\n"
+	    "        Lower overhead by sampling small latencies.\n"
+	    "    -l, --log-period TIME\n"
+	    "        Write and restart log every TIME seconds, TIME >= 60\n");
+}
+
+/*
+ * Properly exit latencytop when it receives SIGINT or SIGTERM.
+ */
+/* ARGSUSED */
+static void
+signal_handler(int sig)
+{
+	lt_gpipe_break("q");
+}
+
+/*
+ * Convert string to integer. It returns error if extra characters are found.
+ */
+static int
+to_int(const char *str, int *result)
+{
+	char *tail = NULL;
+	long ret;
+
+	if (str == NULL || result == NULL) {
+		return (-1);
+	}
+
+	ret = strtol(str, &tail, 10);
+
+	if (tail != NULL && *tail != '\0') {
+		return (-1);
+	}
+
+	*result = (int)ret;
+
+	return (0);
+}
+
+/*
+ * The main function.
+ */
+int
+main(int argc, char *argv[])
+{
+	const char *opt_string = "t:o:k:hf:l:c:";
+	struct option const longopts[] = {
+		{"interval", required_argument, NULL, 't'},
+		{"output-log-file", required_argument, NULL, 'o'},
+		{"kernel-log-level", required_argument, NULL, 'k'},
+		{"help", no_argument, NULL, 'h'},
+		{"feature", required_argument, NULL, 'f'},
+		{"log-period", required_argument, NULL, 'l'},
+		{"config", required_argument, NULL, 'c'},
+		{NULL, 0, NULL, 0}
+	};
+
+	int optc;
+	int longind = 0;
+	int running = 1;
+	int unknown_option = FALSE;
+	int refresh_interval = 5;
+	int klog_level = 0;
+	int log_interval = 0;
+	long long last_logged = 0;
+	char *token = NULL;
+	int retval = 0;
+	int gpipe;
+	int err;
+	uint64_t collect_end;
+	uint64_t current_time;
+	uint64_t delta_time;
+	char logfile[PATH_MAX] = "";
+
+	lt_gpipe_init();
+	(void) signal(SIGINT, signal_handler);
+	(void) signal(SIGTERM, signal_handler);
+
+	/* Default global settings */
+	g_config.lt_cfg_enable_filter = 0;
+	g_config.lt_cfg_trace_sched = 0;
+	g_config.lt_cfg_trace_syncobj = 1;
+	g_config.lt_cfg_low_overhead_mode = 0;
+	/* dtrace snapshot every 1 second */
+	g_config.lt_cfg_snap_interval = 1000;
+#ifdef EMBED_CONFIGS
+	g_config.lt_cfg_config_name = NULL;
+#else
+	g_config.lt_cfg_config_name = lt_strdup(DEFAULT_CONFIG_NAME);
+#endif
+
+	/* Parse command line arguments. */
+	while ((optc = getopt_long(argc, argv, opt_string,
+	    longopts, &longind)) != -1) {
+		switch (optc) {
+		case 'h':
+			print_usage(argv[0], TRUE);
+			goto end_none;
+		case 't':
+			if (to_int(optarg, &refresh_interval) != 0 ||
+			    refresh_interval < 1 || refresh_interval > 60) {
+				lt_display_error(
+				    "Invalid refresh interval: %s\n", optarg);
+				unknown_option = TRUE;
+			} else if (check_opt_dup(LT_CMDOPT_INTERVAL,
+			    refresh_interval)) {
+				unknown_option = TRUE;
+			}
+
+			break;
+		case 'k':
+			if (to_int(optarg, &klog_level) != 0 ||
+			    lt_klog_set_log_level(klog_level) != 0) {
+				lt_display_error(
+				    "Invalid log level: %s\n", optarg);
+				unknown_option = TRUE;
+			} else if (check_opt_dup(LT_CMDOPT_LOG_LEVEL,
+			    refresh_interval)) {
+				unknown_option = TRUE;
+			}
+
+			break;
+		case 'o':
+			if (check_opt_dup(LT_CMDOPT_LOG_FILE, optind)) {
+				unknown_option = TRUE;
+			} else if (strlen(optarg) >= sizeof (logfile)) {
+				lt_display_error(
+				    "Log file name is too long: %s\n",
+				    optarg);
+				unknown_option = TRUE;
+			} else {
+				(void) strncpy(logfile, optarg,
+				    sizeof (logfile));
+			}
+
+			break;
+		case 'f':
+			for (token = strtok(optarg, ","); token != NULL;
+			    token = strtok(NULL, ",")) {
+				int v = TRUE;
+
+				if (strncmp(token, "no", 2) == 0) {
+					v = FALSE;
+					token = &token[2];
+				}
+
+				if (CMPOPT(token, "filter") == 0) {
+					if (check_opt_dup(LT_CMDOPT_F_FILTER,
+					    v)) {
+						unknown_option = TRUE;
+					} else {
+						g_config.lt_cfg_enable_filter
+						    = v;
+					}
+				} else if (CMPOPT(token, "sched") == 0) {
+					if (check_opt_dup(LT_CMDOPT_F_SCHED,
+					    v)) {
+						unknown_option = TRUE;
+					} else {
+						g_config.lt_cfg_trace_sched
+						    = v;
+					}
+				} else if (CMPOPT(token, "sobj") == 0) {
+					if (check_opt_dup(LT_CMDOPT_F_SOBJ,
+					    v)) {
+						unknown_option = TRUE;
+					} else {
+						g_config.lt_cfg_trace_syncobj
+						    = v;
+					}
+				} else if (CMPOPT(token, "low") == 0) {
+					if (check_opt_dup(LT_CMDOPT_F_LOW,
+					    v)) {
+						unknown_option = TRUE;
+					} else {
+						g_config.
+						    lt_cfg_low_overhead_mode
+						    = v;
+					}
+				} else {
+					lt_display_error(
+					    "Unknown feature: %s\n", token);
+					unknown_option = TRUE;
+				}
+			}
+
+			break;
+		case 'l':
+			if (to_int(optarg, &log_interval) != 0 ||
+			    log_interval < 60) {
+				lt_display_error(
+				    "Invalid log interval: %s\n", optarg);
+				unknown_option = TRUE;
+			} else if (check_opt_dup(LT_CMDOPT_LOG_INTERVAL,
+			    log_interval)) {
+				unknown_option = TRUE;
+			}
+
+			break;
+		case 'c':
+			if (strlen(optarg) >= PATH_MAX) {
+				lt_display_error(
+				    "Configuration name is too long.\n");
+				unknown_option = TRUE;
+			} else if (check_opt_dup(LT_CMDOPT_CONFIG_FILE,
+			    optind)) {
+				unknown_option = TRUE;
+			} else {
+				g_config.lt_cfg_config_name =
+				    lt_strdup(optarg);
+			}
+
+			break;
+		default:
+			unknown_option = TRUE;
+			break;
+		}
+	}
+
+	if (!unknown_option && strlen(logfile) > 0) {
+		err = lt_klog_set_log_file(logfile);
+
+		if (err == -1) {
+			lt_display_error("Log file name is too long: %s\n",
+			    logfile);
+			unknown_option = TRUE;
+		} else if (err == -2) {
+			lt_display_error("Cannot write to log file: %s\n",
+			    logfile);
+			unknown_option = TRUE;
+		}
+	}
+
+	/* Throw error for invalid/junk arguments */
+	if (optind  < argc) {
+		int tmpind = optind;
+		(void) fprintf(stderr, "Unknown option(s): ");
+
+		while (tmpind < argc) {
+			(void) fprintf(stderr, "%s ", argv[tmpind++]);
+		}
+
+		(void) fprintf(stderr, "\n");
+		unknown_option = TRUE;
+	}
+
+	if (unknown_option) {
+		print_usage(argv[0], FALSE);
+		retval = 1;
+		goto end_none;
+	}
+
+	(void) printf("%s\n%s\n", TITLE, COPYRIGHT);
+
+	/*
+	 * Initialization
+	 */
+	lt_klog_init();
+
+	if (lt_table_init() != 0) {
+		lt_display_error("Unable to load configuration table.\n");
+		retval = 1;
+		goto end_notable;
+	}
+
+	if (lt_dtrace_init() != 0) {
+		lt_display_error("Unable to initialize dtrace.\n");
+		retval = 1;
+		goto end_nodtrace;
+	}
+
+	last_logged = lt_millisecond();
+
+	(void) printf("Collecting data for %d seconds...\n",
+	    refresh_interval);
+
+	gpipe = lt_gpipe_readfd();
+	collect_end = last_logged + refresh_interval * 1000;
+	for (;;) {
+		fd_set read_fd;
+		struct timeval timeout;
+		int tsleep = collect_end - lt_millisecond();
+
+		if (tsleep <= 0) {
+			break;
+		}
+
+		if (tsleep > g_config.lt_cfg_snap_interval * 1000) {
+			tsleep = g_config.lt_cfg_snap_interval * 1000;
+		}
+
+		timeout.tv_sec = tsleep / 1000;
+		timeout.tv_usec = (tsleep % 1000) * 1000;
+
+		FD_ZERO(&read_fd);
+		FD_SET(gpipe, &read_fd);
+
+		if (select(gpipe + 1, &read_fd, NULL, NULL, &timeout) > 0) {
+			goto end_ubreak;
+		}
+
+		(void) lt_dtrace_work(0);
+	}
+
+	lt_display_init();
+
+	do {
+		current_time = lt_millisecond();
+
+		lt_stat_clear_all();
+		(void) lt_dtrace_collect();
+
+		delta_time = current_time;
+		current_time = lt_millisecond();
+		delta_time = current_time - delta_time;
+
+		if (log_interval > 0 &&
+		    current_time - last_logged > log_interval * 1000) {
+			lt_klog_write();
+			last_logged = current_time;
+		}
+
+		running = lt_display_loop(refresh_interval * 1000 -
+		    delta_time);
+	} while (running != 0);
+
+	lt_klog_write();
+
+	/* Cleanup */
+	lt_display_deinit();
+
+end_ubreak:
+	lt_dtrace_deinit();
+	lt_stat_free_all();
+
+end_nodtrace:
+	lt_table_deinit();
+
+end_notable:
+	lt_klog_deinit();
+
+end_none:
+	lt_gpipe_deinit();
+
+	if (g_config.lt_cfg_config_name != NULL) {
+		free(g_config.lt_cfg_config_name);
+	}
+
+	return (retval);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/latencytop/common/latencytop.d	Mon Sep 28 13:53:34 2009 -0700
@@ -0,0 +1,404 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2008-2009, Intel Corporation.
+ * All Rights Reserved.
+ */
+
+#define	MAX_TAG		8
+#define	MAX_STACK	64
+
+#pragma D option aggsize=8m
+#pragma D option bufsize=16m
+#pragma D option dynvarsize=16m
+#pragma D option aggrate=0
+#pragma D option stackframes=MAX_STACK
+/*
+ * Our D script needs to compile even if some of the TRANSLATE probes cannot
+ * be found. Missing probes can be caused by older kernel, different
+ * architecture, unloaded modules etc.
+ */
+#pragma D option zdefs
+
+#if defined(ENABLE_SCHED)
+#define TRACE_FILTER
+#define TRACE_FILTER_COND(a)	/ (a) /
+#else
+#define TRACE_FILTER	/ pid != 0 /
+#define TRACE_FILTER_COND(a)	/ pid != 0 && (a) /
+#endif
+
+/* Threshold to filter WAKEABLE latencies. */
+#define FILTER_THRESHOLD	5000000
+/* From thread.h */
+#define T_WAKEABLE		2
+
+/*
+ * This array is used to store timestamp of when threads are enqueued
+ * to dispatch queue.
+ * self-> is not accessible when enqueue happens.
+ */
+unsigned long long lt_timestamps[int, int];
+
+self unsigned int lt_is_block_wakeable;
+self unsigned long long lt_sleep_start;
+self unsigned long long lt_sleep_duration;
+self unsigned long long lt_sch_delay;
+self unsigned int lt_counter;		/* only used in low overhead */
+self unsigned long long lt_timestamp;	/* only used in low overhead */
+self unsigned int lt_stackp;
+self unsigned int lt_prio[int];
+self string lt_cause[int];
+
+this unsigned int priority;
+this string cause;
+
+/*
+ * Clean up everything, otherwise we will run out of memory.
+ */
+proc:::lwp-exit
+{
+	lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid] = 0;
+
+	self->lt_sleep_start = 0;
+	self->lt_is_block_wakeable = 0;
+	self->lt_counter = 0;
+	self->lt_timestamp = 0;
+
+	/*
+	 * Workaround: no way to clear associative array.
+	 * We have to manually clear 0 ~ (MAX_TAG-1).
+	 */
+
+	self->lt_prio[0] = 0;
+	self->lt_prio[1] = 0;
+	self->lt_prio[2] = 0;
+	self->lt_prio[3] = 0;
+	self->lt_prio[4] = 0;
+	self->lt_prio[5] = 0;
+	self->lt_prio[6] = 0;
+	self->lt_prio[7] = 0;
+
+	self->lt_cause[0] = 0;
+	self->lt_cause[1] = 0;
+	self->lt_cause[2] = 0;
+	self->lt_cause[3] = 0;
+	self->lt_cause[4] = 0;
+	self->lt_cause[5] = 0;
+	self->lt_cause[6] = 0;
+	self->lt_cause[7] = 0;
+}
+
+#if !defined(ENABLE_LOW_OVERHEAD)
+/*
+ * Log timestamp when a thread is taken off the CPU.
+ */
+sched::resume:off-cpu
+TRACE_FILTER_COND(curlwpsinfo->pr_state == SSLEEP)
+{
+	self->lt_sleep_start = timestamp;
+	self->lt_is_block_wakeable = curthread->t_flag & T_WAKEABLE;
+
+	lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid] =
+	    self->lt_sleep_start;
+}
+
+/*
+ * Log timestamp when a thread is put on a dispatch queue and becomes runnable.
+ */
+sched:::enqueue
+/lt_timestamps[args[1]->pr_pid, args[0]->pr_lwpid] != 0/
+{
+	lt_timestamps[args[1]->pr_pid, args[0]->pr_lwpid] = timestamp;
+}
+
+/*
+ * Calculate latency when the thread is actually on the CPU.
+ * This is necessary in order to get the right stack.
+ */
+this unsigned long long end;
+this unsigned long long now;
+sched::resume:on-cpu
+/self->lt_sleep_start != 0/
+{
+	this->end = lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid];
+	this->now = timestamp;
+	lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid] = 0;
+	this->end = (this->end != 0 && this->end != self->lt_sleep_start)
+	    ? this->end : this->now;
+
+	self->lt_sch_delay = this->now - this->end;
+	self->lt_sleep_duration = this->end - self->lt_sleep_start;
+	self->lt_sleep_start = 0;
+}
+
+/*
+ * Filter: drop all "large" latency when it is interruptible, i.e., sleep()
+ * etc.
+ */
+#if defined(ENABLE_FILTER)
+sched::resume:on-cpu
+/self->lt_sleep_duration > FILTER_THRESHOLD &&
+  self->lt_is_block_wakeable != 0/
+{
+	self->lt_sch_delay = 0;
+	self->lt_sleep_duration = 0;
+	self->lt_is_block_wakeable = 0;
+}
+#endif /* defined(ENABLE_FILTER) */
+
+/*
+ * Write sleep time to the aggregation.
+ * lt_sleep_duration is the duration between the time when a thread is taken
+ * off the CPU and the time when it is enqueued again.
+ */
+sched::resume:on-cpu
+/self->lt_sleep_duration != 0/
+{
+	this->cause = self->lt_stackp > 0 ?
+	    self->lt_cause[self->lt_stackp - 1] : "";
+	this->priority = self->lt_stackp > 0 ?
+	    self->lt_prio[self->lt_stackp - 1] : 0;
+
+	@lt_call_count[pid, tid, stack(), this->cause,
+	    this->priority] = count();
+	@lt_call_sum[pid, tid, stack(), this->cause,
+	    this->priority] = sum(self->lt_sleep_duration);
+	@lt_call_max[pid, tid, stack(),  this->cause,
+	    this->priority] = max(self->lt_sleep_duration);
+
+	self->lt_is_block_wakeable = 0;	/* Clear the flag to avoid leak */
+	self->lt_sleep_duration = 0;
+}
+
+/*
+ * Write time spent in queue to the aggregation.
+ * lt_sch_delay is the interval between the time when a thread becomes
+ * runnable and the time when it is actually on the CPU.
+ */
+sched::resume:on-cpu
+/self->lt_sch_delay != 0/
+{
+	@lt_named_count[pid, tid, "Wait for available CPU"] = count();
+	@lt_named_sum[pid, tid, "Wait for available CPU"] =
+	    sum(self->lt_sch_delay);
+	@lt_named_max[pid, tid, "Wait for available CPU"] =
+	    max(self->lt_sch_delay);
+
+	self->lt_sch_delay = 0;
+}
+
+/*
+ * Probes to track latency caused by spinning on a lock.
+ */
+lockstat:::adaptive-spin
+TRACE_FILTER
+{
+	@lt_named_count[pid, tid, "Adapt. lock spin"] = count();
+	@lt_named_sum[pid, tid, "Adapt. lock spin"] = sum(arg1);
+	@lt_named_max[pid, tid, "Adapt. lock spin"] = max(arg1);
+}
+
+lockstat:::spin-spin
+TRACE_FILTER
+{
+	@lt_named_count[pid, tid, "Spinlock spin"] = count();
+	@lt_named_sum[pid, tid, "Spinlock spin"] = sum(arg1);
+	@lt_named_max[pid, tid, "Spinlock spin"] = max(arg1);
+}
+
+/*
+ * Probes to track latency caused by blocking on a lock.
+ */
+lockstat:::adaptive-block
+TRACE_FILTER
+{
+	@lt_named_count[pid, tid, "#Adapt. lock block"] = count();
+	@lt_named_sum[pid, tid, "#Adapt. lock block"] = sum(arg1);
+	@lt_named_max[pid, tid, "#Adapt. lock block"] = max(arg1);
+}
+
+lockstat:::rw-block
+TRACE_FILTER
+{
+	@lt_named_count[pid, tid, "#RW. lock block"] = count();
+	@lt_named_sum[pid, tid, "#RW. lock block"] = sum(arg1);
+	@lt_named_max[pid, tid, "#RW. lock block"] = max(arg1);
+}
+
+#if defined(ENABLE_SYNCOBJ)
+/*
+ * Probes to track latency caused by synchronization objects.
+ */
+this int stype;
+this unsigned long long wchan;
+this unsigned long long wtime;
+
+sched:::wakeup
+/*
+ * Currently we are unable to track wakeup from sched, because all its LWP IDs
+ * are zero when we trace it and that makes lt_timestamps unusable.
+ */
+/args[1]->pr_pid != 0 &&
+    lt_timestamps[args[1]->pr_pid, args[0]->pr_lwpid] != 0/
+{
+	this->stype = args[0]->pr_stype;
+	this->wchan = args[0]->pr_wchan;
+	/*
+	 * We can use lt_timestamps[] here, because
+	 * wakeup is always fired before enqueue.
+	 * After enqueue, lt_timestamps[] will be overwritten.
+	 */
+	this->wtime = timestamp - lt_timestamps[args[1]->pr_pid,
+	    args[0]->pr_lwpid];
+
+	@lt_sync_count[args[1]->pr_pid, args[0]->pr_lwpid, this->stype,
+	    this->wchan] = count();
+	@lt_sync_sum[args[1]->pr_pid, args[0]->pr_lwpid, this->stype,
+	    this->wchan] = sum(this->wtime);
+	@lt_sync_max[args[1]->pr_pid, args[0]->pr_lwpid, this->stype,
+	    this->wchan] = max(this->wtime);
+}
+#endif /* defined(ENABLE_SYNCOBJ) */
+
+#else /* !defined(ENABLE_LOW_OVERHEAD) */
+
+/*
+ * This is the low overhead mode.
+ * In order to reduce the number of instructions executed during each
+ * off-cpu and on-cpu event, we do the following:
+ *
+ *	1. Use sampling and update aggregations only roughly 1/100 times
+ *		(SAMPLE_TIMES).
+ *	2. Do not track anything other than what is needed for "main" window.
+ *	3. Use as few thread local variables as possible.
+ */
+
+#define SAMPLE_TIMES		100
+#define SAMPLE_THRESHOLD	50000000
+
+/*
+ * Log timestamp when a thread is off CPU.
+ */
+sched::resume:off-cpu
+TRACE_FILTER_COND(curlwpsinfo->pr_state == SSLEEP)
+{
+	self->lt_timestamp = timestamp;
+#if defined(ENABLE_FILTER)
+	self->lt_is_block_wakeable = curthread->t_flag & T_WAKEABLE;
+#endif /* defined(ENABLE_FILTER) */
+}
+
+/*
+ * Calculate latency when a thread is actually on the CPU.
+ */
+this int need_skip;
+sched::resume:on-cpu
+/self->lt_timestamp != 0/
+{
+	self->lt_timestamp = timestamp - self->lt_timestamp;
+
+#if defined(ENABLE_FILTER)
+	self->lt_timestamp =
+	    (self->lt_timestamp > FILTER_THRESHOLD &&
+	    self->lt_is_block_wakeable != 0) ? 0 : self->lt_timestamp;
+	self->lt_is_block_wakeable = 0;
+#endif /* defined(ENABLE_FILTER) */
+
+	this->need_skip = (self->lt_counter < (SAMPLE_TIMES - 1) &&
+	    self->lt_timestamp <= SAMPLE_THRESHOLD) ? 1 : 0;
+	self->lt_timestamp = this->need_skip ? 0 : self->lt_timestamp;
+	self->lt_counter += this->need_skip;
+}
+
+/*
+ * Track large latency first.
+ */
+sched::resume:on-cpu
+/self->lt_timestamp > SAMPLE_THRESHOLD/
+{
+	this->cause = self->lt_stackp > 0 ?
+	    self->lt_cause[self->lt_stackp - 1] : "";
+	this->priority = self->lt_stackp > 0 ?
+	    self->lt_prio[self->lt_stackp - 1] : 0;
+
+	@lt_call_count[pid, tid, stack(), this->cause,
+	    this->priority] = sum(1);
+	@lt_call_sum[pid, tid, stack(), this->cause,
+	    this->priority] = sum(self->lt_timestamp);
+	@lt_call_max[pid, tid, stack(), this->cause,
+	    this->priority] = max(self->lt_timestamp);
+
+	self->lt_timestamp = 0;
+}
+
+/*
+ * If we fall back to this probe, that means the latency is small and counter
+ * has reached SAMPLE_TIMES.
+ */
+sched::resume:on-cpu
+/self->lt_timestamp != 0/
+{
+	this->cause = self->lt_stackp > 0 ?
+	    self->lt_cause[self->lt_stackp - 1] : "";
+	this->priority = self->lt_stackp > 0 ?
+	    self->lt_prio[self->lt_stackp - 1] : 0;
+
+	/* Need +1 because lt_counter has not been updated in this cycle. */
+	@lt_call_count[pid, tid, stack(), this->cause,
+	    this->priority] = sum(self->lt_counter + 1);
+	@lt_call_sum[pid, tid, stack(), this->cause,
+	    this->priority] = sum((self->lt_counter + 1) * self->lt_timestamp);
+	@lt_call_max[pid, tid, stack(), this->cause,
+	    this->priority] = max(self->lt_timestamp);
+
+	self->lt_timestamp = 0;
+	self->lt_counter = 0;
+}
+
+#endif /* !defined(ENABLE_LOW_OVERHEAD) */
+
+#define	TRANSLATE(entryprobe, returnprobe, cause, priority)		\
+entryprobe								\
+TRACE_FILTER_COND(self->lt_stackp == 0 ||				\
+    (self->lt_stackp < MAX_TAG &&					\
+    self->lt_prio[self->lt_stackp - 1] <= priority) )			\
+{									\
+	self->lt_prio[self->lt_stackp] = priority;			\
+	self->lt_cause[self->lt_stackp] = cause;			\
+	++self->lt_stackp;						\
+}									\
+returnprobe								\
+TRACE_FILTER_COND(self->lt_stackp > 0 &&				\
+    self->lt_cause[self->lt_stackp - 1] == cause)			\
+{									\
+	--self->lt_stackp;						\
+	self->lt_cause[self->lt_stackp] = NULL;				\
+}
+
+/*
+ * Syscalls have a priority of 10. This is to make sure that latency is
+ * traced to one of the syscalls only if nothing else matches.
+ * We put this special probe here because it uses "probefunc" variable instead
+ * of a constant string.
+ */
+
+TRANSLATE(syscall:::entry, syscall:::return, probefunc, 10)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/latencytop/common/latencytop.h	Mon Sep 28 13:53:34 2009 -0700
@@ -0,0 +1,269 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2008-2009, Intel Corporation.
+ * All Rights Reserved.
+ */
+
+#ifndef _LATENCYTOP_H
+#define	_LATENCYTOP_H
+
+#include <sys/types.h>
+
+#include <glib.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * Without this lint seems to be confused by glib header file.
+ */
+#ifdef __lint
+#undef g_assert
+#define	g_assert(x)	((void)(x))
+#undef TRUE
+#define	TRUE		1
+#endif
+
+/*
+ * We define our own conversions in order to avoid compiler warnings.
+ */
+#define	LT_INT_TO_POINTER(a)	((void *)(unsigned long)(a))
+
+#define	TITLE			"LatencyTOP for OpenSolaris, version 1.0"
+#define	COPYRIGHT		"Copyright (c) 2008-2009, Intel Corporation."
+#define	DEFAULT_KLOG_FILE	"/var/log/latencytop.log"
+
+#define	INVALID_PID		(~0)
+#define	INVALID_TID		(~0)
+#define	PID_SYS_GLOBAL		INVALID_PID
+#define	INVALID_CAUSE		0
+#define	HIGHER_PRIORITY(a, b)	((a) > (b))
+
+#ifdef EMBED_CONFIGS
+/*
+ * LatencyTOP configuration is embedded in the binary.
+ * Array will be generated by elfwrap.
+ */
+extern char latencytop_d_start;
+extern char latencytop_d_end;
+extern char latencytop_trans_start;
+extern char latencytop_trans_end;
+#else
+/*
+ * LatencyTOP configuration is provided externally by user.
+ */
+#define	DEFAULT_CONFIG_NAME	"./latencytop.trans"
+#define	DEFAULT_D_SCRIPT_NAME   "./latencytop.d"
+#endif
+
+typedef enum {
+	LT_STAT_COUNT,
+	LT_STAT_MAX,
+	LT_STAT_SUM,
+} lt_stat_type_t;
+
+#define	LT_KLOG_LEVEL_NONE	0	/* Log nothing */
+#define	LT_KLOG_LEVEL_UNMAPPED	1	/* Log only stacks not mapped */
+#define	LT_KLOG_LEVEL_MAPPED	2	/* Log only stacks mapped */
+#define	LT_KLOG_LEVEL_ALL	3	/* Log all stacks, mapped or not */
+
+typedef enum {
+	LT_LEVEL_GLOBAL,	/* System wide statistics */
+	LT_LEVEL_PROCESS,	/* Per-process statistics */
+	LT_LEVEL_THREAD,	/* Per-thread statistics */
+} lt_stat_level_t;
+
+typedef enum {
+	LT_SORT_TOTAL,
+	LT_SORT_MAX,
+	LT_SORT_AVG,
+	LT_SORT_COUNT,
+} lt_sort_t;
+
+typedef enum {
+	LT_FIELD_FNAME,
+	LT_FIELD_PSARGS,
+} lt_field_t;
+
+typedef enum {
+	LT_LIST_CAUSE,		/* List latency by causes (default) */
+	LT_LIST_SPECIALS,	/* List only "special" causes */
+	LT_LIST_SOBJ		/* List synchronization objects */
+} lt_list_type_t;
+
+/*
+ * Data structure which contains statistics.
+ */
+typedef struct {
+	uint64_t lt_s_count;
+	uint64_t lt_s_total;
+	uint64_t lt_s_max;
+} lt_stat_data_t;
+
+/*
+ * Data structure that stores statistics along with the name.
+ */
+typedef struct {
+	enum {
+		STAT_CAUSE,
+		STAT_SOBJ
+	} lt_se_type;
+	const char *lt_se_string;
+	lt_stat_data_t lt_se_data;
+	union {
+		struct {
+			int lt_se_c_id;
+			int lt_se_c_flags;
+		} lt_se_t_cause;
+		struct {
+			int lt_se_s_id;
+		} lt_se_t_sobj;
+	} lt_se_tsdata;	/* type specific data */
+} lt_stat_entry_t;
+
+typedef struct {
+	int lt_cfg_enable_filter;
+	int lt_cfg_trace_sched;
+	int lt_cfg_trace_syncobj;
+	int lt_cfg_low_overhead_mode;
+	int lt_cfg_snap_interval;
+	char *lt_cfg_config_name;
+} lt_config_t;
+
+extern lt_config_t g_config;	/* The global settings */
+
+/*
+ * Causes can be disabled through the configuration file.
+ * When disabled, though D script will continue to capture causes, they will
+ * not be counted by LatencyTOP.
+ */
+#define	CAUSE_FLAG_DISABLED		1
+/*
+ * This flag will not show and count causes as part of summary in
+ * "kstack window".
+ */
+#define	CAUSE_FLAG_HIDE_IN_SUMMARY	2
+/*
+ * This is generated from D script (named cause), and is "special".
+ */
+#define	CAUSE_FLAG_SPECIAL		4
+#define	CAUSE_ALL_FLAGS			0xffffffff
+
+/*
+ * These functions collect statistics using DTrace.
+ */
+extern int lt_dtrace_init(void);
+extern int lt_dtrace_work(int);
+extern int lt_dtrace_collect(void);
+extern void lt_dtrace_deinit(void);
+
+/*
+ * These functions maintain configuration, e.g. symbol to cause mapping.
+ */
+extern int lt_table_init(void);
+extern int lt_table_cause_from_stack(const char *, int *, int *);
+extern const char *lt_table_get_cause_name(int);
+extern int lt_table_get_cause_flag(int, int);
+extern int lt_table_cause_from_name(char *, int, int);
+extern int lt_table_append_trans(FILE *fp);
+extern void lt_table_deinit(void);
+
+/*
+ * These functions update statistic of all causes of latency, collected
+ * from DTrace.
+ */
+extern void lt_stat_update(pid_t, id_t, char *, char *, unsigned int,
+    lt_stat_type_t, uint64_t);
+extern void lt_stat_update_cause(pid_t, id_t, int, lt_stat_type_t, uint64_t);
+extern void lt_stat_update_sobj(pid_t, id_t, int, unsigned long long,
+    lt_stat_type_t, uint64_t);
+extern void lt_stat_clear_all(void);
+extern void lt_stat_free_all(void);
+
+/*
+ * These functions produce lists for display panes.
+ * Note: after a call to lt_stat_update_*, the old lists will become invalid.
+ */
+extern void *lt_stat_list_create(lt_list_type_t, lt_stat_level_t,
+    pid_t, id_t, int, lt_sort_t);
+extern int lt_stat_list_has_item(void *, int);
+extern const char *lt_stat_list_get_reason(void *, int);
+extern uint64_t lt_stat_list_get_max(void *, int);
+extern uint64_t lt_stat_list_get_sum(void *, int);
+extern uint64_t lt_stat_list_get_count(void *, int);
+extern uint64_t lt_stat_list_get_gtotal(void *);
+extern void lt_stat_list_free(void *);
+
+/*
+ * These functions produce the process list and the thread list.
+ */
+extern int lt_stat_proc_list_create(pid_t **, id_t **);
+extern void lt_stat_proc_list_free(pid_t *, id_t *);
+extern const char *lt_stat_proc_get_name(pid_t);
+extern int lt_stat_proc_get_nthreads(pid_t);
+
+/*
+ * These functions use ncurses to create console-based display.
+ */
+extern void lt_display_init(void);
+extern int lt_display_loop(int);
+extern void lt_display_error(const char *, ...);
+extern void lt_display_deinit(void);
+
+/*
+ * Write statistics to log file - useful for debugging and offline analysis.
+ */
+extern void lt_klog_init(void);
+extern void lt_klog_deinit(void);
+extern int lt_klog_set_log_file(const char *);
+extern int lt_klog_set_log_level(int);
+extern void lt_klog_write(void);
+extern void lt_klog_log(int, pid_t, char *, lt_stat_type_t,
+    uint64_t);
+
+/*
+ * Utility functions.
+ */
+extern uint64_t lt_millisecond(void);
+extern void *lt_malloc(size_t);
+extern void *lt_zalloc(size_t);
+extern char *lt_strdup(const char *);
+extern void lt_check_null(void *);
+extern void lt_time_str(char *, int);
+extern char *lt_get_proc_field(pid_t, lt_field_t);
+extern void lt_update_stat_value(lt_stat_data_t *, lt_stat_type_t, uint64_t);
+extern int lt_sort_by_total_desc(lt_stat_entry_t *, lt_stat_entry_t *);
+extern int lt_sort_by_max_desc(lt_stat_entry_t *, lt_stat_entry_t *);
+extern int lt_sort_by_count_desc(lt_stat_entry_t *, lt_stat_entry_t *);
+extern int lt_sort_by_avg_desc(lt_stat_entry_t *, lt_stat_entry_t *);
+extern void lt_gpipe_init(void);
+extern void lt_gpipe_deinit(void);
+extern void lt_gpipe_break(const char *);
+extern int lt_gpipe_readfd(void);
+extern int lt_file_exist(const char *);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _LATENCYTOP_H */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/latencytop/common/latencytop.trans	Mon Sep 28 13:53:34 2009 -0700
@@ -0,0 +1,44 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2008-2009, Intel Corporation.
+# All Rights Reserved.
+#
+# LatencyTOP 1.0 configuration
+#
+
+#
+# Format:
+# D <priority>    <probe 1>     <probe 2>  ...   <probe n>     <Cause>
+# ; <special_command>  <option value>
+# # comments
+#
+# D: We use dtrace probes to tag latency to different causes. The entry probe
+# pushes the cause and the priority to a thread local stack, and the return
+# probe pops them. These probes are transformed into:
+#     TRANSLATE(entryprobe, returnprobe, cause, priority)
+# and are append to the embedded D script.
+#
+# <special_command>:
+# disable_category <category name> : do not count and display <category name>
+#
+
+# ZFS
+D 60    sdt:zfs:zil_commit_writer:zil-cw1  sdt:zfs:zil_commit_writer:zil-cw4  ZFS ZIL writer I/O
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/latencytop/common/stat.c	Mon Sep 28 13:53:34 2009 -0700
@@ -0,0 +1,1050 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2008-2009, Intel Corporation.
+ * All Rights Reserved.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <memory.h>
+#include <string.h>
+#include <limits.h>
+#include <sys/stat.h>
+
+#include "latencytop.h"
+
+/* Statistics for each process/thread. */
+typedef struct _lt_stat_collection lt_stat_collection_t;
+typedef gboolean (*check_child_func_t) (gpointer key,
+    lt_stat_collection_t *stat, void *user);
+
+typedef struct {
+	lt_stat_entry_t lt_grp_summary;
+	/* cause_id -> stat entry */
+	GHashTable *lt_grp_cidlist;
+} lt_datagroup_t;
+
+#define	NGROUPS			2
+#define	GROUP_CAUSE		0
+#define	GROUP_SOBJ		1
+
+/*
+ * A data collection hierarchy involving three entities - system, process
+ * and thread. The hierarchic relationship is as follows :
+ *
+ *		1 system -> 1 or more processes -> 1 or more threads
+ */
+struct _lt_stat_collection {
+	lt_stat_level_t lt_sc_level;
+	unsigned int lt_sc_id;
+	char *lt_sc_name;
+	lt_datagroup_t lt_sc_groups[NGROUPS];
+	/*
+	 * The following fields: lt_sc_parent, lt_sc_children and
+	 * lt_sc_check_child_func maintain the tree structure.
+	 */
+	lt_stat_collection_t *lt_sc_parent;		/* Parent node */
+	GHashTable *lt_sc_children;	/* pid/tid -> lt_stat_collection_t */
+	check_child_func_t lt_sc_check_child_func; /* Release dead children */
+};
+
+/* Internal data structure to back up a stat_list */
+typedef struct _lt_stat_list lt_stat_list_t;
+typedef void (*free_list_func_t)(lt_stat_list_t *);
+struct _lt_stat_list {
+	int lt_sl_entry_count;
+	lt_stat_entry_t **lt_sl_entries;
+	uint64_t lt_sl_gtotal;
+	free_list_func_t lt_sl_free_func;
+};
+
+/* Root of the collection hierarchy: system level statistics */
+static lt_stat_collection_t *stat_system = NULL;
+
+/*
+ * Data structure to hold synchronization objects.
+ * We don't use normal "cause table" because this needs to be cleared
+ * every time we refresh in order to make sure that stale synchronization
+ * objects don't consume memory.
+ */
+typedef struct {
+	int lt_soi_type;
+	unsigned long long lt_soi_addr;
+} lt_sobj_id_t;
+
+typedef struct {
+	lt_sobj_id_t lt_so_oid;
+	int lt_so_cause_id;
+	char lt_so_string[32];	/* Enough to hold "%s: 0x%llX" */
+} lt_sobj_t;
+
+static GHashTable *sobj_table = NULL;
+static int sobj_table_len = 0;
+
+/*
+ * Lower 32-bit of the address of synchronization objects is used to hash
+ * them.
+ */
+static guint
+sobj_id_hash(lt_sobj_id_t *id)
+{
+	g_assert(id != NULL);
+	return (id->lt_soi_addr & 0xFFFFFFFF);
+}
+
+/*
+ * Test if two synchronization objects are the same.
+ */
+static gboolean
+sobj_id_equal(lt_sobj_id_t *a, lt_sobj_id_t *b)
+{
+	g_assert(a != NULL && b != NULL);
+	return (a->lt_soi_type == b->lt_soi_type &&
+	    a->lt_soi_addr == b->lt_soi_addr);
+}
+
+/*
+ * Look up the cause_id of a synchronization object.
+ * Note that this cause_id is only unique in GROUP_SOBJ, and changes after
+ * a refresh.
+ */
+static lt_sobj_t *
+lookup_sobj(lt_sobj_id_t *id)
+{
+	const char *stype_str[] = {
+		"None",
+		"Mutex",
+		"RWLock",
+		"CV",
+		"Sema",
+		"User",
+		"User_PI",
+		"Shuttle"
+	};
+	const int stype_str_len =
+	    sizeof (stype_str) / sizeof (stype_str[0]);
+	lt_sobj_t *ret = NULL;
+	g_assert(id != NULL);
+
+	if (id->lt_soi_type < 0 || id->lt_soi_type >= stype_str_len) {
+		return (NULL);
+	}
+
+	if (sobj_table != NULL) {
+		ret = (lt_sobj_t *)g_hash_table_lookup(sobj_table, id);
+	} else {
+		sobj_table = g_hash_table_new_full(
+		    (GHashFunc)sobj_id_hash, (GEqualFunc)sobj_id_equal,
+		    NULL, (GDestroyNotify)free);
+		lt_check_null(sobj_table);
+	}
+
+	if (ret == NULL) {
+		ret = (lt_sobj_t *)lt_zalloc(sizeof (lt_sobj_t));
+		ret->lt_so_cause_id = ++sobj_table_len;
+		(void) snprintf(ret->lt_so_string, sizeof (ret->lt_so_string),
+		    "%s: 0x%llX", stype_str[id->lt_soi_type], id->lt_soi_addr);
+		ret->lt_so_oid.lt_soi_type = id->lt_soi_type;
+		ret->lt_so_oid.lt_soi_addr = id->lt_soi_addr;
+
+		g_hash_table_insert(sobj_table, &ret->lt_so_oid, ret);
+	}
+
+	return (ret);
+}
+
+/*
+ * Check if a process exists by using /proc/pid
+ */
+/* ARGSUSED */
+static gboolean
+check_process(gpointer key, lt_stat_collection_t *stat, void *user)
+{
+	char name[PATH_MAX];
+
+	(void) snprintf(name, PATH_MAX, "/proc/%u", stat->lt_sc_id);
+	return (lt_file_exist(name) ? FALSE : TRUE);
+}
+
+/*
+ * Check if a thread exists by using /proc/pid/lwp/tid
+ */
+/* ARGSUSED */
+static gboolean
+check_thread(gpointer key, lt_stat_collection_t *stat, void *user)
+{
+	char name[PATH_MAX];
+
+	g_assert(stat->lt_sc_parent != NULL);
+	g_assert(stat->lt_sc_parent->lt_sc_level == LT_LEVEL_PROCESS);
+
+	(void) snprintf(name, PATH_MAX, "/proc/%u/lwp/%u",
+	    stat->lt_sc_parent->lt_sc_id, stat->lt_sc_id);
+	return (lt_file_exist(name) ? FALSE : TRUE);
+}
+
+/*
+ * Helper function to free a stat node.
+ */
+static void
+free_stat(lt_stat_collection_t *stat)
+{
+	int i;
+
+	if (stat == NULL) {
+		return;
+	}
+
+	for (i = 0; i < NGROUPS; ++i) {
+		if (stat->lt_sc_groups[i].lt_grp_cidlist != NULL) {
+			g_hash_table_destroy(stat->lt_sc_groups[i].
+			    lt_grp_cidlist);
+		}
+	}
+
+	if (stat->lt_sc_children != NULL) {
+		g_hash_table_destroy(stat->lt_sc_children);
+	}
+
+	if (stat->lt_sc_name != NULL) {
+		free(stat->lt_sc_name);
+	}
+
+	free(stat);
+}
+
+/*
+ * Helper function to initialize a stat node.
+ */
+/* ARGSUSED */
+static void
+clear_stat(gpointer key, lt_stat_collection_t *stat, void *user)
+{
+	int i;
+
+	g_assert(stat != NULL);
+
+	for (i = 0; i < NGROUPS; ++i) {
+		if (stat->lt_sc_groups[i].lt_grp_cidlist != NULL) {
+			g_hash_table_destroy(stat->lt_sc_groups[i].
+			    lt_grp_cidlist);
+			stat->lt_sc_groups[i].lt_grp_cidlist = NULL;
+		}
+
+		stat->lt_sc_groups[i].lt_grp_summary.lt_se_data.lt_s_count = 0;
+		stat->lt_sc_groups[i].lt_grp_summary.lt_se_data.lt_s_total = 0;
+		stat->lt_sc_groups[i].lt_grp_summary.lt_se_data.lt_s_max = 0;
+	}
+
+	if (stat->lt_sc_children != NULL) {
+		g_hash_table_foreach_remove(stat->lt_sc_children,
+		    (GHRFunc)stat->lt_sc_check_child_func, NULL);
+		g_hash_table_foreach(stat->lt_sc_children,
+		    (GHFunc)clear_stat, NULL);
+	}
+}
+
+/*
+ * Update a collection with the given value.
+ * Recursively update parents in the hierarchy  until the root is reached.
+ */
+static void
+update_stat_entry(lt_stat_collection_t *stat, int cause_id,
+		lt_stat_type_t type, uint64_t value,
+		const char *string, int group_to_use)
+{
+	lt_stat_entry_t *entry = NULL;
+	lt_datagroup_t *group;
+
+	if (group_to_use < 0 || group_to_use >= NGROUPS) {
+		return;
+	}
+
+	group = &(stat->lt_sc_groups[group_to_use]);
+
+	if (group->lt_grp_cidlist != NULL) {
+		entry = (lt_stat_entry_t *)g_hash_table_lookup(
+		    group->lt_grp_cidlist, LT_INT_TO_POINTER(cause_id));
+	} else   {
+		group->lt_grp_cidlist = g_hash_table_new_full(
+		    g_direct_hash, g_direct_equal,
+		    NULL, (GDestroyNotify)free);
+		lt_check_null(group->lt_grp_cidlist);
+	}
+
+	if (entry == NULL) {
+		entry = (lt_stat_entry_t *)lt_zalloc(sizeof (lt_stat_entry_t));
+		entry->lt_se_string = string;
+
+		switch (group_to_use) {
+		case GROUP_CAUSE:
+			entry->lt_se_type = STAT_CAUSE;
+			entry->lt_se_tsdata.lt_se_t_cause.lt_se_c_id = cause_id;
+			entry->lt_se_tsdata.lt_se_t_cause.lt_se_c_flags =
+			    lt_table_get_cause_flag(cause_id, CAUSE_ALL_FLAGS);
+
+			/* hide the first '#' */
+			if ((entry->lt_se_tsdata.lt_se_t_cause.lt_se_c_flags
+			    & CAUSE_FLAG_HIDE_IN_SUMMARY) != 0) {
+				++entry->lt_se_string;
+			}
+
+			break;
+		case GROUP_SOBJ:
+			entry->lt_se_type = STAT_SOBJ;
+			entry->lt_se_tsdata.lt_se_t_sobj.lt_se_s_id = cause_id;
+			break;
+		}
+
+		g_hash_table_insert(group->lt_grp_cidlist,
+		    LT_INT_TO_POINTER(cause_id), entry);
+	}
+
+	lt_update_stat_value(&entry->lt_se_data, type, value);
+
+	if (group_to_use == GROUP_SOBJ ||
+	    (entry->lt_se_tsdata.lt_se_t_cause.lt_se_c_flags &
+	    CAUSE_FLAG_HIDE_IN_SUMMARY) == 0) {
+		lt_update_stat_value(&group->lt_grp_summary.lt_se_data, type,
+		    value);
+	}
+
+	if (stat->lt_sc_parent != NULL) {
+		update_stat_entry(stat->lt_sc_parent, cause_id, type, value,
+		    string, group_to_use);
+	}
+}
+
+/*
+ * Identify the cause of latency from the given stack trace.
+ * Return cause_id.
+ */
+static void
+find_cause(char *stack, int *cause_id, int *cause_priority)
+{
+	int cause_temp;
+	int prio_temp;
+	int cause = INVALID_CAUSE;
+	int priority = 0;
+	int found = 0;
+
+	g_assert(cause_id != NULL);
+	g_assert(cause_priority != NULL);
+
+	while (stack != NULL) {
+		char *sep;
+		sep = strchr(stack, ' ');
+
+		if (sep != NULL) {
+			*sep = '\0';
+		}
+
+		found = lt_table_cause_from_stack(stack, &cause_temp,
+		    &prio_temp);
+
+		if (found && (cause == INVALID_CAUSE ||
+		    HIGHER_PRIORITY(prio_temp, priority))) {
+			cause = cause_temp;
+			priority = prio_temp;
+		}
+
+		if (sep != NULL) {
+			*sep = ' ';
+			stack = sep + 1;
+		} else   {
+			stack = NULL;
+		}
+	}
+
+	*cause_id = cause;
+	*cause_priority = priority;
+}
+
+/*
+ * Create a new collection and hook it to the parent.
+ */
+static lt_stat_collection_t *
+new_collection(lt_stat_level_t level, unsigned int id, char *name,
+    lt_stat_collection_t *parent, check_child_func_t check_child_func)
+{
+	int i;
+	lt_stat_collection_t *ret;
+
+	ret = (lt_stat_collection_t *)
+	    lt_zalloc(sizeof (lt_stat_collection_t));
+
+	ret->lt_sc_level = level;
+	ret->lt_sc_check_child_func = check_child_func;
+	ret->lt_sc_id = id;
+	ret->lt_sc_name = name;
+
+	for (i = 0; i < NGROUPS; ++i) {
+		ret->lt_sc_groups[i].lt_grp_summary.lt_se_string =
+		    (const char *)name;
+	}
+
+	if (parent != NULL) {
+		ret->lt_sc_parent = parent;
+
+		if (parent->lt_sc_children == NULL) {
+			parent->lt_sc_children = g_hash_table_new_full(
+			    g_direct_hash, g_direct_equal,
+			    NULL, (GDestroyNotify)free_stat);
+			lt_check_null(parent->lt_sc_children);
+		}
+
+		g_hash_table_insert(parent->lt_sc_children,
+		    LT_INT_TO_POINTER((int)id), ret);
+	}
+
+	return (ret);
+}
+
+/*
+ * Find the "leaf" in the collection hierarchy, using the given pid and tid.
+ */
+static lt_stat_collection_t *
+get_stat_c(pid_t pid, id_t tid)
+{
+	lt_stat_collection_t *stat_p = NULL;
+	lt_stat_collection_t *stat_t = NULL;
+
+	if (stat_system == NULL) {
+		stat_system = new_collection(LT_LEVEL_GLOBAL,
+		    PID_SYS_GLOBAL, lt_strdup("SYSTEM"), NULL, check_process);
+	} else if (stat_system->lt_sc_children != NULL) {
+		stat_p = (lt_stat_collection_t *)
+		    g_hash_table_lookup(stat_system->lt_sc_children,
+		    LT_INT_TO_POINTER(pid));
+	}
+
+	if (stat_p == NULL) {
+		char *fname;
+		fname = lt_get_proc_field(pid, LT_FIELD_FNAME);
+
+		if (fname == NULL) {
+			/*
+			 * we could not get the executable name of the
+			 * process; the process is probably already dead.
+			 */
+			return (NULL);
+		}
+
+		stat_p = new_collection(LT_LEVEL_PROCESS,
+		    (unsigned int)pid, fname, stat_system, check_thread);
+	} else if (stat_p->lt_sc_children != NULL) {
+		stat_t = (lt_stat_collection_t *)
+		    g_hash_table_lookup(stat_p->lt_sc_children,
+		    LT_INT_TO_POINTER(tid));
+	}
+
+	if (stat_t == NULL) {
+		const int tname_size = 16; /* Enough for "Thread %d" */
+		char *tname;
+
+		tname = (char *)lt_zalloc(tname_size);
+		(void) snprintf(tname, tname_size, "Thread %d", tid);
+
+		stat_t = new_collection(LT_LEVEL_THREAD,
+		    (unsigned int)tid, tname, stat_p, NULL);
+	}
+
+	return (stat_t);
+}
+
+/*
+ * Update statistics with the given cause_id. Values will be added to
+ * internal statistics.
+ */
+void
+lt_stat_update_cause(pid_t pid, id_t tid, int cause_id, lt_stat_type_t type,
+    uint64_t value)
+{
+	const char *string;
+	lt_stat_collection_t *stat_t = NULL;
+
+	if (cause_id < 0 || value == 0) {
+		return;
+	}
+
+	if (lt_table_get_cause_flag(cause_id, CAUSE_FLAG_DISABLED)) {
+		/* Ignore this cause */
+		return;
+	}
+
+	stat_t = get_stat_c(pid, tid);
+
+	if (stat_t == NULL) {
+		/* Process must be dead. */
+		return;
+	}
+
+	string = lt_table_get_cause_name(cause_id);
+
+	update_stat_entry(stat_t, cause_id, type, value, string, GROUP_CAUSE);
+}
+
+/*
+ * Update statistics with the given stack trace.
+ * The stack trace is mapped to a cause and lt_stat_update_cause() is called
+ * to update statistics.
+ */
+void
+lt_stat_update(pid_t pid, id_t tid, char *stack, char *tag,
+    unsigned int tag_priority, lt_stat_type_t type, uint64_t value)
+{
+	int tag_cause_id = INVALID_CAUSE;
+	int stack_cause_id = INVALID_CAUSE;
+	int cause_id = INVALID_CAUSE;
+	int stack_priority = 0;
+
+	if (value == 0) {
+		return;
+	}
+
+	find_cause(stack, &stack_cause_id, &stack_priority);
+
+	if (tag_priority != 0) {
+		tag_cause_id = lt_table_cause_from_name(tag, 0, 0);
+
+		if (tag_cause_id == INVALID_CAUSE) {
+			/* This must be a syscall tag */
+			char tmp[64];
+			(void) snprintf(tmp, sizeof (tmp), "Syscall: %s", tag);
+			tag_cause_id = lt_table_cause_from_name(tmp, 1, 0);
+		}
+	}
+
+	cause_id = (tag_priority > stack_priority) ? tag_cause_id :
+	    stack_cause_id;
+
+	if (cause_id == INVALID_CAUSE) {
+		/*
+		 * We got an unmapped stack. Set SPECIAL flag to display it
+		 * in pane 2. This makes it easier to find the cause.
+		 */
+		cause_id = lt_table_cause_from_name(stack, 1,
+		    CAUSE_FLAG_SPECIAL);
+		lt_klog_log(LT_KLOG_LEVEL_UNMAPPED, pid, stack, type, value);
+	} else   {
+		lt_klog_log(LT_KLOG_LEVEL_MAPPED, pid, stack, type, value);
+	}
+
+	lt_stat_update_cause(pid, tid, cause_id, type, value);
+}
+
+/*
+ * Zero out all statistics, but keep the data structures in memory
+ * to be used to hold new data immediately following.
+ */
+void
+lt_stat_clear_all(void)
+{
+	if (stat_system != NULL) {
+		clear_stat(NULL, stat_system, NULL);
+	}
+
+	if (sobj_table != NULL) {
+		g_hash_table_destroy(sobj_table);
+		sobj_table = NULL;
+	}
+}
+
+/*
+ * Clean up function that frees all memory used for statistics.
+ */
+void
+lt_stat_free_all(void)
+{
+	if (stat_system != NULL) {
+		free_stat(stat_system);
+		stat_system = NULL;
+	}
+
+	if (sobj_table != NULL) {
+		g_hash_table_destroy(sobj_table);
+		sobj_table = NULL;
+	}
+}
+
+/*
+ * Get top N causes of latency for a process. Return handle to a stat_list.
+ * Use pid = PID_SYS_GLOBAL to get global top list.
+ * Call lt_stat_list_free after use to clean up.
+ */
+void *
+lt_stat_list_create(lt_list_type_t list_type, lt_stat_level_t level,
+    pid_t pid, id_t tid, int count, lt_sort_t sort_by)
+{
+	GCompareFunc func;
+	GList *list, *walk;
+	lt_stat_collection_t *stat_c = NULL;
+	lt_stat_list_t *ret;
+	lt_datagroup_t *group;
+
+	if (level == LT_LEVEL_GLOBAL) {
+		/* Use global entry */
+		stat_c = stat_system;
+	} else if (stat_system != NULL && stat_system->lt_sc_children != NULL) {
+		/* Find process entry first */
+		stat_c = (lt_stat_collection_t *)g_hash_table_lookup(
+		    stat_system->lt_sc_children, LT_INT_TO_POINTER(pid));
+
+		if (level == LT_LEVEL_THREAD) {
+			/*
+			 * If thread entry is requested, find it based on
+			 * process entry.
+			 */
+			if (stat_c != NULL && stat_c->lt_sc_children != NULL) {
+				stat_c = (lt_stat_collection_t *)
+				    g_hash_table_lookup(stat_c->lt_sc_children,
+				    LT_INT_TO_POINTER(tid));
+			} else {
+				/*
+				 * Thread entry was not found; set it to NULL,
+				 * so that we can return empty list later.
+				 */
+				stat_c = NULL;
+			}
+		}
+	}
+
+	ret = (lt_stat_list_t *)lt_zalloc(sizeof (lt_stat_list_t));
+	ret->lt_sl_entries = (lt_stat_entry_t **)
+	    lt_zalloc(count * sizeof (lt_stat_entry_t *));
+
+	if (stat_c == NULL) {
+		/* Empty list */
+		return (ret);
+	}
+
+	if (list_type == LT_LIST_SOBJ) {
+		group = &(stat_c->lt_sc_groups[GROUP_SOBJ]);
+	} else {
+		group = &(stat_c->lt_sc_groups[GROUP_CAUSE]);
+	}
+
+	if (group->lt_grp_cidlist == NULL) {
+		/* Empty list */
+		return (ret);
+	}
+
+	ret->lt_sl_gtotal = group->lt_grp_summary.lt_se_data.lt_s_total;
+
+	list = g_hash_table_get_values(group->lt_grp_cidlist);
+
+	switch (sort_by) {
+	case LT_SORT_TOTAL:
+		func = (GCompareFunc)lt_sort_by_total_desc;
+		break;
+	case LT_SORT_MAX:
+		func = (GCompareFunc)lt_sort_by_max_desc;
+		break;
+	case LT_SORT_AVG:
+		func = (GCompareFunc)lt_sort_by_avg_desc;
+		break;
+	case LT_SORT_COUNT:
+		func = (GCompareFunc)lt_sort_by_count_desc;
+		break;
+	}
+	list = g_list_sort(list, func);
+
+	for (walk = list;
+	    walk != NULL && count > 0;
+	    walk = g_list_next(walk), --count) {
+		lt_stat_entry_t *data = (lt_stat_entry_t *)walk->data;
+
+		if (list_type == LT_LIST_CAUSE &&
+		    data->lt_se_type == STAT_CAUSE &&
+		    (data->lt_se_tsdata.lt_se_t_cause.lt_se_c_flags &
+		    CAUSE_FLAG_HIDE_IN_SUMMARY) != 0) {
+			continue;
+		}
+
+		if (list_type == LT_LIST_SPECIALS &&
+		    data->lt_se_type == STAT_CAUSE &&
+		    (data->lt_se_tsdata.lt_se_t_cause.lt_se_c_flags &
+		    CAUSE_FLAG_SPECIAL) == 0) {
+			continue;
+		}
+
+		if (data->lt_se_data.lt_s_count == 0) {
+			break;
+		}
+
+		ret->lt_sl_entries[ret->lt_sl_entry_count++] = data;
+	}
+
+	g_list_free(list);
+
+	return (ret);
+}
+
+/*
+ * Free memory allocated by lt_stat_list_create().
+ */
+void
+lt_stat_list_free(void *ptr)
+{
+	lt_stat_list_t *list = (lt_stat_list_t *)ptr;
+
+	if (list == NULL) {
+		return;
+	}
+
+	if (list->lt_sl_free_func != NULL) {
+		list->lt_sl_free_func(list);
+	}
+
+	if (list->lt_sl_entries != NULL) {
+		free(list->lt_sl_entries);
+	}
+
+	free(list);
+}
+
+/*
+ * Check if the given list contains the given item.
+ */
+int
+lt_stat_list_has_item(void *ptr, int i)
+{
+	lt_stat_list_t *list = (lt_stat_list_t *)ptr;
+
+	if (list == NULL || i < 0 || i >= list->lt_sl_entry_count ||
+	    list->lt_sl_entries[i] == NULL) {
+		return (0);
+	}
+
+	return (1);
+}
+
+/*
+ * Get display name of the given item i in the given list.
+ */
+const char *
+lt_stat_list_get_reason(void *ptr, int i)
+{
+	lt_stat_list_t *list = (lt_stat_list_t *)ptr;
+
+	if (list == NULL || i < 0 || i >= list->lt_sl_entry_count ||
+	    list->lt_sl_entries[i] == NULL) {
+		return (NULL);
+	}
+
+	g_assert(list->lt_sl_entries[i]->lt_se_string != NULL);
+
+	return (list->lt_sl_entries[i]->lt_se_string);
+}
+
+/*
+ * Get maximum value of the given item i in the given list.
+ */
+uint64_t
+lt_stat_list_get_max(void *ptr, int i)
+{
+	lt_stat_list_t *list = (lt_stat_list_t *)ptr;
+
+	if (list == NULL || i < 0 || i >= list->lt_sl_entry_count ||
+	    list->lt_sl_entries[i] == NULL) {
+		return (0);
+	}
+
+	return (list->lt_sl_entries[i]->lt_se_data.lt_s_max);
+}
+
+/*
+ * Get total value of the given item i in the given list.
+ */
+uint64_t
+lt_stat_list_get_sum(void *ptr, int i)
+{
+	lt_stat_list_t *list = (lt_stat_list_t *)ptr;
+
+	if (list == NULL || i < 0 || i >= list->lt_sl_entry_count ||
+	    list->lt_sl_entries[i] == NULL) {
+		return (0);
+	}
+
+	return (list->lt_sl_entries[i]->lt_se_data.lt_s_total);
+}
+
+/*
+ * Get count value of the given item i in the given list.
+ */
+uint64_t
+lt_stat_list_get_count(void *ptr, int i)
+{
+	lt_stat_list_t *list = (lt_stat_list_t *)ptr;
+
+	if (list == NULL || i < 0 || i >= list->lt_sl_entry_count ||
+	    list->lt_sl_entries[i] == NULL) {
+		return (0);
+	}
+
+	return (list->lt_sl_entries[i]->lt_se_data.lt_s_count);
+}
+
+/*
+ * Get grand total of all latency in the list.
+ */
+uint64_t
+lt_stat_list_get_gtotal(void *ptr)
+{
+	lt_stat_list_t *list = (lt_stat_list_t *)ptr;
+
+	if (list == NULL) {
+		return (0);
+	}
+
+	return (list->lt_sl_gtotal);
+}
+
+/*
+ * ============================================================================
+ * Process and thread list.
+ * They share a lot of the static variables that are used for keeping
+ * statistics, hence they are located in this file.
+ */
+
+/*
+ * Helper function, sort by PID/TID ascend.
+ */
+static int
+sort_id(lt_stat_collection_t *a, lt_stat_collection_t *b)
+{
+	return ((int)(a->lt_sc_id - b->lt_sc_id));
+}
+
+/*
+ * Get the current list of processes. Call lt_stat_proc_list_free after use
+ * to clean up.
+ */
+static int
+plist_create(pid_t ** list)
+{
+	GList *pid_list, *walk;
+	int ret, count;
+
+	ret = g_hash_table_size(stat_system->lt_sc_children);
+	*list = (pid_t *)lt_malloc(sizeof (pid_t) * ret);
+
+	pid_list = g_hash_table_get_values(stat_system->lt_sc_children);
+	pid_list = g_list_sort(pid_list, (GCompareFunc)sort_id);
+
+	for (walk = pid_list, count = 0;
+	    walk != NULL && count < ret;
+	    walk = g_list_next(walk), ++count) {
+		(*list)[count] = (int)
+		    ((lt_stat_collection_t *)(walk->data))->lt_sc_id;
+	}
+
+	g_list_free(pid_list);
+
+	return (ret);
+}
+
+/*
+ * Count the no. of threads currently present in a process.
+ * Only thread that have SSLEEP are counted.
+ */
+/* ARGSUSED */
+static void
+count_threads(gpointer key, lt_stat_collection_t *stat_c, int *ret)
+{
+	g_assert(ret != NULL);
+
+	if (stat_c->lt_sc_children != NULL) {
+		*ret += g_hash_table_size(stat_c->lt_sc_children);
+	}
+}
+
+/*
+ * Get current list of processes and threads.
+ * Call lt_stat_proc_list_free after use to clean up.
+ */
+static int
+tlist_create(pid_t ** plist, id_t ** tlist)
+{
+	GList *pid_list, *walk_p;
+	GList *tid_list, *walk_t;
+	int ret = 0;
+	int count = 0;
+
+	g_hash_table_foreach(stat_system->lt_sc_children,
+	    (GHFunc)count_threads, &ret);
+
+	*plist = (pid_t *)lt_malloc(sizeof (pid_t) * ret);
+	*tlist = (id_t *)lt_malloc(sizeof (id_t) * ret);
+
+	pid_list = g_hash_table_get_values(stat_system->lt_sc_children);
+	pid_list = g_list_sort(pid_list, (GCompareFunc)sort_id);
+
+	for (walk_p = pid_list; walk_p != NULL;
+	    walk_p = g_list_next(walk_p)) {
+		lt_stat_collection_t *stat_p =
+		    (lt_stat_collection_t *)walk_p->data;
+
+		if (stat_p->lt_sc_children == NULL) {
+			continue;
+		}
+
+		tid_list = g_hash_table_get_values(stat_p->lt_sc_children);
+		tid_list = g_list_sort(tid_list, (GCompareFunc)sort_id);
+
+		for (walk_t = tid_list; walk_t != NULL;
+		    walk_t = g_list_next(walk_t)) {
+			lt_stat_collection_t *stat_t =
+			    (lt_stat_collection_t *)walk_t->data;
+
+			(*plist)[count] = (int)stat_p->lt_sc_id;
+			(*tlist)[count] = (int)stat_t->lt_sc_id;
+
+			++count;
+		}
+		g_list_free(tid_list);
+	}
+
+	g_list_free(pid_list);
+	g_assert(count == ret);
+
+	return (ret);
+}
+
+/*
+ * List of processes that are tracked by LatencyTOP.
+ */
+int
+lt_stat_proc_list_create(pid_t ** plist, id_t ** tlist)
+{
+	if (plist == NULL) {
+		return (-1);
+	}
+
+	if (stat_system == NULL || stat_system->lt_sc_children == NULL) {
+		*plist = NULL;
+
+		if (tlist != NULL) {
+			*tlist = NULL;
+		}
+
+		return (0);
+	}
+
+	if (tlist == NULL) {
+		return (plist_create(plist));
+	} else {
+		return (tlist_create(plist, tlist));
+	}
+}
+
+/*
+ * Free memory allocated by lt_stat_proc_list_create().
+ */
+void
+lt_stat_proc_list_free(pid_t *plist, id_t *tlist)
+{
+	if (plist != NULL) {
+		free(plist);
+	}
+
+	if (tlist != NULL) {
+		free(tlist);
+	}
+}
+
+/*
+ * Get executable name of the given process (ID).
+ */
+const char *
+lt_stat_proc_get_name(pid_t pid)
+{
+	lt_stat_collection_t *stat_p = NULL;
+
+	if (stat_system == NULL || stat_system->lt_sc_children == NULL) {
+		return (NULL);
+	}
+
+	stat_p = (lt_stat_collection_t *)g_hash_table_lookup(
+	    stat_system->lt_sc_children, LT_INT_TO_POINTER(pid));
+
+	if (stat_p != NULL) {
+		return (stat_p->lt_sc_name);
+	} else   {
+		return (NULL);
+	}
+}
+
+/*
+ * Get number of threads.
+ */
+int
+lt_stat_proc_get_nthreads(pid_t pid)
+{
+	lt_stat_collection_t *stat_p = NULL;
+
+	if (stat_system == NULL || stat_system->lt_sc_children == NULL) {
+		return (0);
+	}
+
+	stat_p = (lt_stat_collection_t *)g_hash_table_lookup(
+	    stat_system->lt_sc_children, LT_INT_TO_POINTER(pid));
+
+	if (stat_p != NULL) {
+		return (g_hash_table_size(stat_p->lt_sc_children));
+	} else   {
+		return (0);
+	}
+}
+
+/*
+ * Update statistics for synchronization objects.
+ */
+void
+lt_stat_update_sobj(pid_t pid, id_t tid, int stype,
+    unsigned long long wchan,
+    lt_stat_type_t type, uint64_t value)
+{
+	lt_sobj_id_t id;
+	lt_sobj_t *sobj;
+	int cause_id;
+	lt_stat_collection_t *stat_t = NULL;
+
+	stat_t = get_stat_c(pid, tid);
+
+	if (stat_t == NULL) {
+		return;
+	}
+
+	id.lt_soi_type = stype;
+	id.lt_soi_addr = wchan;
+	sobj = lookup_sobj(&id);
+
+	if (sobj == NULL) {
+		return;
+	}
+
+	cause_id = sobj->lt_so_cause_id;
+
+	update_stat_entry(stat_t, cause_id, type, value,
+	    sobj->lt_so_string, GROUP_SOBJ);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/latencytop/common/table.c	Mon Sep 28 13:53:34 2009 -0700
@@ -0,0 +1,840 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2008-2009, Intel Corporation.
+ * All Rights Reserved.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <memory.h>
+#include <stdio.h>
+#include <ctype.h>
+
+#include "latencytop.h"
+
+/*
+ * Structure that holds detail of a cause.
+ */
+typedef struct {
+	int lt_c_cause_id;
+	int lt_c_flags;
+	char *lt_c_name;
+} lt_cause_t;
+
+/*
+ * Structure that represents a matched cause.
+ */
+typedef struct  {
+	int lt_mt_priority;
+	int lt_mt_cause_id;
+} lt_match_t;
+
+/* All lt_cause_t that are created. */
+static GHashTable *cause_lookup = NULL;
+static GPtrArray *causes_array = NULL;
+static int causes_array_len = 0;
+
+/*
+ * This hash table maps a symbol to a cause.
+ * key is of type "char *" and value is of type "lt_match_t *".
+ */
+static GHashTable *symbol_lookup_table = NULL;
+
+/*
+ * The dtrace translation rules we get from the script
+ */
+char *dtrans = NULL;
+
+/*
+ * These structures are only used inside .trans parser.
+ */
+typedef struct {
+	int lt_dm_priority;
+	char *lt_dm_macro;
+} lt_dmacro_t;
+
+typedef struct {
+	GSequence *lt_pr_cmd_disable;
+	GHashTable *lt_pr_dmacro;
+} lt_parser_t;
+
+/* ARGSUSED */
+static void
+free_cause(lt_cause_t *cause, void *user)
+{
+	g_assert(cause != NULL && cause->lt_c_name != NULL);
+
+	free(cause->lt_c_name);
+	free(cause);
+}
+
+static void
+free_dmacro(lt_dmacro_t *d)
+{
+	g_assert(d->lt_dm_macro != NULL);
+	free(d->lt_dm_macro);
+	free(d);
+}
+
+/*
+ * Add a cause.
+ */
+static lt_cause_t *
+new_cause(char *name, int flags)
+{
+	lt_cause_t *entry;
+
+	g_assert(name != NULL);
+
+	entry = (lt_cause_t *)lt_malloc(sizeof (lt_cause_t));
+	entry->lt_c_flags = flags;
+	entry->lt_c_name = name;
+	entry->lt_c_cause_id = causes_array_len;
+
+	g_ptr_array_add(causes_array, entry);
+	++causes_array_len;
+
+	return (entry);
+}
+
+/*
+ * Set a cause to "disabled" state.
+ */
+static void
+disable_cause(char *cause_str, GHashTable *cause_table)
+{
+	lt_cause_t *cause;
+
+	cause = (lt_cause_t *)g_hash_table_lookup(cause_table, cause_str);
+
+	if (cause != NULL) {
+		cause->lt_c_flags |= CAUSE_FLAG_DISABLED;
+	}
+}
+
+/*
+ * Helper functions that reads a line from a character array.
+ */
+static int
+read_line_from_mem(const char *mem, int mem_len, char *line, int line_len,
+    int *index)
+{
+	g_assert(mem != NULL && line != NULL && index != NULL);
+
+	if (line_len <= 0 || mem_len <= 0) {
+		return (0);
+	}
+
+	if (*index >= mem_len) {
+		return (0);
+	}
+
+	while (line_len > 1 && *index < mem_len) {
+		*line = mem[(*index)++];
+		--line_len;
+		++line;
+
+		if (*(line-1) == '\r' || *(line-1) == '\n') {
+			break;
+		}
+	}
+	*line = '\0';
+
+	return (1);
+}
+
+/*
+ * Parse special command from configuration file. Special command
+ * has the following format :
+
+ *	disable_cause <cause name>
+ */
+static int
+parse_config_cmd(char *begin, lt_parser_t *parser)
+{
+	char *tmp;
+	char old_chr = 0;
+
+	/*
+	 * disable_cause  FSFlush Daemon
+	 * ^
+	 */
+	if (*begin == '\0') {
+		return (0);
+	}
+
+	for (tmp = begin;
+	    *tmp != '\0' && !isspace(*tmp);
+	    ++tmp) {
+	}
+	old_chr = *tmp;
+	*tmp = '\0';
+
+	if (strcmp("disable_cause", begin) == 0) {
+		if (old_chr == '\0') {
+			/* Must have an argument */
+			lt_display_error(
+			    "Invalid command format: %s\n",
+			    begin);
+			return (-1);
+		}
+
+		begin = tmp+1;
+		while (isspace(*begin)) {
+			++begin;
+		}
+
+		g_sequence_append(parser->lt_pr_cmd_disable,
+		    lt_strdup(begin));
+	} else   {
+		*tmp = old_chr;
+		lt_display_error(
+		    "Unknown command: %s\n", begin);
+		return (-1);
+	}
+
+	return (0);
+}
+
+/*
+ * Parse symbol translation from configuration file. Symbol translation
+ * has the following format :
+ *
+ *	<priority> <symbol name> <cause>
+ *
+ * Finally check if that cause has already been mapped.
+ */
+static int
+parse_sym_trans(char *begin)
+{
+	int priority = 0;
+	char *match;
+	char *match_dup;
+	char *cause_str;
+	lt_cause_t *cause;
+	lt_match_t *match_entry;
+	char *tmp;
+
+	/*
+	 * 10	genunix`pread			Syscall pread
+	 * ^
+	 */
+	priority = strtol(begin, &tmp, 10);
+
+	if (tmp == begin || priority == 0) {
+		return (-1);
+	}
+
+	begin = tmp;
+
+	/*
+	 * 10	genunix`pread			Syscall pread
+	 * --^
+	 */
+
+	if (!isspace(*begin)) {
+		/* At least one space char after <priority> */
+		return (-1);
+	}
+
+	while (isspace(*begin)) {
+		++begin;
+	}
+
+	if (*begin == 0) {
+		return (-1);
+	}
+
+	/*
+	 * 10	genunix`pread			Syscall pread
+	 * -----^
+	 */
+	for (tmp = begin;
+	    *tmp != '\0' && !isspace(*tmp);
+	    ++tmp) {
+	}
+
+	if (*tmp == '\0') {
+		return (-1);
+	}
+
+	*tmp = '\0';
+	match = begin;
+
+	/* Check if we have mapped this function before. */
+	match_entry = (lt_match_t *)
+	    g_hash_table_lookup(symbol_lookup_table, match);
+
+	if (match_entry != NULL &&
+	    HIGHER_PRIORITY(match_entry->lt_mt_priority, priority)) {
+		/* We already have a higher entry. Ignore this. */
+		return (0);
+	}
+
+	begin = tmp + 1;
+
+	/*
+	 * 10	genunix`pread			Syscall pread
+	 * -------------------------------------^
+	 */
+	while (isspace(*begin)) {
+		++begin;
+	}
+
+	if (*begin == 0) {
+		return (-1);
+	}
+
+	cause_str = begin;
+
+	/* Check if we have mapped this cause before. */
+	cause = (lt_cause_t *)
+	    g_hash_table_lookup(cause_lookup, cause_str);
+
+	if (cause == NULL) {
+		char *cause_dup = lt_strdup(cause_str);
+		cause = new_cause(cause_dup, 0);
+		g_hash_table_insert(cause_lookup, cause_dup, cause);
+	}
+
+	match_entry = (lt_match_t *)lt_malloc(sizeof (lt_match_t));
+	match_entry->lt_mt_priority = priority;
+	match_entry->lt_mt_cause_id = cause->lt_c_cause_id;
+	match_dup = lt_strdup(match);
+
+	g_hash_table_insert(symbol_lookup_table, match_dup,
+	    match_entry);
+
+	return (0);
+}
+
+/*
+ * Parse D macro. D macros have the following format :
+ *
+ *	<priority> <entry probe> <return probe> <cause>
+ *
+ * Finally check if that cause has already been mapped.
+ */
+static int
+parse_dmacro(char *begin, lt_parser_t *parser)
+{
+	int priority = 0;
+	char *entryprobe;
+	char *returnprobe;
+	char *cause_str;
+	char buf[512];
+	char probepair[512];
+	char *tmp = NULL;
+	lt_cause_t *cause;
+	lt_dmacro_t *dmacro;
+
+	/*
+	 * 10	syscall::pread:entry	syscall::pread:return	Syscall pread
+	 * ^
+	 */
+	priority = strtol(begin, &tmp, 10);
+
+	if (tmp == begin || priority == 0) {
+		return (-1);
+	}
+
+	begin = tmp;
+
+	/*
+	 * 10	syscall::pread:entry	syscall::pread:return	Syscall pread
+	 * --^
+	 */
+	while (isspace(*begin)) {
+		++begin;
+	}
+
+	if (*begin == 0) {
+		return (-1);
+	}
+
+	/*
+	 * 10	syscall::pread:entry	syscall::pread:return	Syscall pread
+	 * -----^
+	 */
+	for (tmp = begin;
+	    *tmp != '\0' && !isspace(*tmp);
+	    ++tmp) {
+	}
+
+	if (*tmp == '\0') {
+		return (-1);
+	}
+
+	*tmp = '\0';
+	entryprobe = begin;
+	begin = tmp + 1;
+
+	while (isspace(*begin)) {
+		++begin;
+	}
+
+	/*
+	 * 10	syscall::pread:entry	syscall::pread:return	Syscall pread
+	 * -----------------------------^
+	 */
+	for (tmp = begin;
+	    *tmp != '\0' && !isspace(*tmp);
+	    ++tmp) {
+	}
+
+	if (*tmp == '\0') {
+		return (-1);
+	}
+
+	*tmp = '\0';
+	returnprobe = begin;
+	begin = tmp + 1;
+
+	while (isspace(*begin)) {
+		++begin;
+	}
+
+	/*
+	 * 10	syscall::pread:entry	syscall::pread:return	Syscall pread
+	 * -----------------------------------------------------^
+	 */
+	if (*begin == 0) {
+		return (-1);
+	}
+
+	cause_str = begin;
+
+	dmacro = NULL;
+
+	/* Check if we have mapped this cause before. */
+	cause = (lt_cause_t *)
+	    g_hash_table_lookup(cause_lookup, cause_str);
+
+	if (cause == NULL) {
+		char *cause_dup = lt_strdup(cause_str);
+		cause = new_cause(cause_dup, 0);
+		g_hash_table_insert(cause_lookup, cause_dup, cause);
+	}
+
+	(void) snprintf(buf, sizeof (buf), "\nTRANSLATE(%s, %s, \"%s\", %d)\n",
+	    entryprobe, returnprobe, cause_str, priority);
+
+	(void) snprintf(probepair, sizeof (probepair), "%s %s", entryprobe,
+	    returnprobe);
+
+	g_assert(cause != NULL);
+	g_assert(parser->lt_pr_dmacro != NULL);
+
+	dmacro = g_hash_table_lookup(parser->lt_pr_dmacro, probepair);
+
+	if (dmacro == NULL) {
+		dmacro = (lt_dmacro_t *)lt_malloc(sizeof (lt_dmacro_t));
+		dmacro->lt_dm_priority = priority;
+		dmacro->lt_dm_macro = lt_strdup(buf);
+		g_hash_table_insert(parser->lt_pr_dmacro, lt_strdup(probepair),
+		    dmacro);
+	} else if (dmacro->lt_dm_priority < priority) {
+		free(dmacro->lt_dm_macro);
+		dmacro->lt_dm_priority = priority;
+		dmacro->lt_dm_macro = lt_strdup(buf);
+	}
+
+	return (0);
+}
+
+/*
+ * Helper function to collect TRANSLATE() macros.
+ */
+/* ARGSUSED */
+static void
+genscript(void *key, lt_dmacro_t *dmacro, GString *str)
+{
+	g_string_append(str, dmacro->lt_dm_macro);
+}
+
+/*
+ * Main logic that parses translation rules one line at a time,
+ * and creates a lookup table from it. The syntax for the translation
+ * is as follows :
+ *
+ *	#				<--- comment
+ *	D <D macro rule>		<--- D macro
+ *	S <Symbol translation>		<--- Symbols
+ *	disable_cause <cause>		<--- special command
+ */
+static int
+parse_config(const char *work, int work_len)
+{
+	char line[256];
+	int len;
+	char *begin, *end;
+	int current = 0;
+	lt_parser_t parser;
+	int ret = 0;
+	char flag;
+	GString *script;
+
+	cause_lookup = g_hash_table_new(g_str_hash, g_str_equal);
+	lt_check_null(cause_lookup);
+
+	parser.lt_pr_cmd_disable = g_sequence_new((GDestroyNotify)free);
+	lt_check_null(parser.lt_pr_cmd_disable);
+
+	parser.lt_pr_dmacro = g_hash_table_new_full(g_str_hash,
+	    g_str_equal, (GDestroyNotify)free, (GDestroyNotify)free_dmacro);
+	lt_check_null(parser.lt_pr_dmacro);
+
+	while (read_line_from_mem(work, work_len, line, sizeof (line),
+	    &current)) {
+		len = strlen(line);
+
+		if (line[len-1] != '\n' && line[len-1] != '\r' &&
+		    current < work_len) {
+			lt_display_error("Configuration line too long.\n");
+			goto err;
+		}
+
+		begin = line;
+
+		while (isspace(*begin)) {
+			++begin;
+		}
+
+		if (*begin == '\0') {
+			/* Ignore empty line */
+			continue;
+		}
+
+		/* Delete trailing spaces. */
+		end = begin + strlen(begin) - 1;
+
+		while (isspace(*end)) {
+			--end;
+		}
+
+		end[1] = '\0';
+
+		flag = *begin;
+		++begin;
+
+		switch (flag) {
+		case '#':
+			ret = 0;
+			break;
+		case ';':
+			ret = parse_config_cmd(begin, &parser);
+			break;
+		case 'D':
+		case 'd':
+			if (!isspace(*begin)) {
+				lt_display_error(
+				    "No space after flag char: %s\n", line);
+			}
+			while (isspace(*begin)) {
+				++begin;
+			}
+			ret = parse_dmacro(begin, &parser);
+			break;
+		case 'S':
+		case 's':
+			if (!isspace(*begin)) {
+				lt_display_error(
+				    "No space after flag char: %s\n", line);
+			}
+			while (isspace(*begin)) {
+				++begin;
+			}
+			ret = parse_sym_trans(begin);
+			break;
+		default:
+			ret = -1;
+			break;
+		}
+
+		if (ret != 0) {
+			lt_display_error(
+			    "Invalid configuration line: %s\n", line);
+			goto err;
+		}
+	}
+
+	script = g_string_new(NULL);
+	g_hash_table_foreach(parser.lt_pr_dmacro, (GHFunc)genscript, script);
+	dtrans = g_string_free(script, FALSE);
+
+	if (dtrans != NULL && strlen(dtrans) == 0) {
+		free(dtrans);
+		dtrans = NULL;
+	}
+
+	g_sequence_foreach(parser.lt_pr_cmd_disable, (GFunc)disable_cause,
+	    cause_lookup);
+	g_sequence_free(parser.lt_pr_cmd_disable);
+
+	return (0);
+
+err:
+	g_sequence_free(parser.lt_pr_cmd_disable);
+	g_hash_table_destroy(parser.lt_pr_dmacro);
+	return (-1);
+
+}
+
+/*
+ * Init function, called when latencytop starts.
+ * It loads translation rules from the configuration file. The configuration
+ * file defines some causes and symbols that match those causes.
+ */
+int
+lt_table_init(void)
+{
+	char *config_loaded = NULL;
+	int config_loaded_len = 0;
+	const char *work = NULL;
+	int work_len = 0;
+	lt_cause_t *cause;
+
+#ifdef EMBED_CONFIGS
+	work = &latencytop_trans_start;
+	work_len = (int)(&latencytop_trans_end - &latencytop_trans_start);
+#endif
+
+	if (g_config.lt_cfg_config_name != NULL) {
+		FILE *fp;
+		fp = fopen(g_config.lt_cfg_config_name, "r");
+
+		if (NULL == fp) {
+			lt_display_error(
+			    "Unable to open configuration file.\n");
+			return (-1);
+		}
+
+		(void) fseek(fp, 0, SEEK_END);
+		config_loaded_len = (int)ftell(fp);
+		config_loaded = (char *)lt_malloc(config_loaded_len);
+		(void) fseek(fp, 0, SEEK_SET);
+
+		/* A zero-byte translation is valid */
+		if (config_loaded_len != 0 &&
+		    fread(config_loaded, config_loaded_len, 1, fp) == 0) {
+			lt_display_error(
+			    "Unable to read configuration file.\n");
+			(void) fclose(fp);
+			free(config_loaded);
+			return (-1);
+		}
+
+		(void) fclose(fp);
+		(void) printf("Loaded configuration from %s\n",
+		    g_config.lt_cfg_config_name);
+
+		work = config_loaded;
+		work_len = config_loaded_len;
+	}
+
+	lt_table_deinit();
+	causes_array = g_ptr_array_new();
+	lt_check_null(causes_array);
+
+	/* 0 is not used, but it is kept as a place for bugs etc. */
+	cause = new_cause(lt_strdup("Nothing"), CAUSE_FLAG_DISABLED);
+	g_assert(cause->lt_c_cause_id == INVALID_CAUSE);
+
+	symbol_lookup_table = g_hash_table_new_full(
+	    g_str_hash, g_str_equal,
+	    (GDestroyNotify)free, (GDestroyNotify)free);
+	lt_check_null(symbol_lookup_table);
+
+	if (work_len != 0 && parse_config(work, work_len) != 0) {
+		return (-1);
+	}
+
+	if (config_loaded != NULL) {
+		free(config_loaded);
+	}
+
+	return (0);
+}
+
+/*
+ * Some causes, such as "lock spinning", do not have stack trace. Names
+ * of such causes are explicitly specified in the D script.
+ * This function resolves such causes and dynamically adds them
+ * to the global tables when they are found first. If auto_create is set
+ * to TRUE, the entry will be created if it is not found.
+ * Return cause_id of the cause.
+ */
+int
+lt_table_cause_from_name(char *name, int auto_create, int flags)
+{
+	lt_cause_t *cause = NULL;
+
+	if (cause_lookup == NULL) {
+		cause_lookup = g_hash_table_new(g_str_hash, g_str_equal);
+		lt_check_null(cause_lookup);
+	} else   {
+		cause = (lt_cause_t *)
+		    g_hash_table_lookup(cause_lookup, name);
+	}
+
+	if (cause == NULL && auto_create) {
+		char *cause_dup;
+
+		if (name[0] == '#') {
+			flags |= CAUSE_FLAG_HIDE_IN_SUMMARY;
+		}
+
+		cause_dup = lt_strdup(name);
+		cause = new_cause(cause_dup, flags);
+		g_hash_table_insert(cause_lookup, cause_dup, cause);
+	}
+
+	return (cause == NULL ? INVALID_CAUSE : cause->lt_c_cause_id);
+}
+
+/*
+ * Try to map a symbol on stack to a known cause.
+ * module_func has the format "module_name`function_name".
+ * cause_id and priority will be set if a cause is found.
+ * If cause is found return 1, otherwise return 0.
+ */
+int
+lt_table_cause_from_stack(const char *module_func, int *cause_id, int *priority)
+{
+	lt_match_t *match;
+
+	g_assert(module_func != NULL && cause_id != NULL && priority != NULL);
+
+	if (symbol_lookup_table == NULL) {
+		return (0);
+	}
+
+	match = (lt_match_t *)
+	    g_hash_table_lookup(symbol_lookup_table, module_func);
+
+	if (match == NULL) {
+		char *func = strchr(module_func, '`');
+
+		if (func != NULL) {
+			match = (lt_match_t *)
+			    g_hash_table_lookup(symbol_lookup_table, func);
+		}
+	}
+
+	if (match == NULL) {
+		return (0);
+	} else   {
+		*cause_id = match->lt_mt_cause_id;
+		*priority = match->lt_mt_priority;
+		return (1);
+	}
+}
+
+/*
+ * Get the display name of a cause. cause_id must be valid,
+ * it is usually returned from lt_table_cause_from_stack() or
+ * lt_table_cause_from_name().
+ */
+const char *
+lt_table_get_cause_name(int cause_id)
+{
+	lt_cause_t *cause;
+
+	if (cause_id < 0 || cause_id >= causes_array_len) {
+		return (NULL);
+	}
+
+	cause = (lt_cause_t *)g_ptr_array_index(causes_array, cause_id);
+
+	if (cause == NULL) {
+		return (NULL);
+	} else {
+		return (cause->lt_c_name);
+	}
+}
+
+/*
+ * Check cause flag.
+ * If CAUSE_ALL_FLAGS is passed in, all flags are returned.
+ */
+int
+lt_table_get_cause_flag(int cause_id, int flag)
+{
+	lt_cause_t *cause;
+
+	if (cause_id < 0 || cause_id >= causes_array_len) {
+		return (0);
+	}
+
+	cause = (lt_cause_t *)g_ptr_array_index(causes_array, cause_id);
+
+	if (cause == NULL) {
+		return (0);
+	} else {
+		return (cause->lt_c_flags & flag);
+	}
+}
+
+/*
+ * Append macros to D script, if any.
+ */
+int
+lt_table_append_trans(FILE *fp)
+{
+	if (dtrans != NULL) {
+		if (fwrite(dtrans, strlen(dtrans), 1, fp) != 1) {
+			return (-1);
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Clean up function.
+ * Free the resources used for symbol table (symbols, causes etc.).
+ */
+void
+lt_table_deinit(void)
+{
+	if (symbol_lookup_table != NULL) {
+		g_hash_table_destroy(symbol_lookup_table);
+		symbol_lookup_table = NULL;
+	}
+
+	if (cause_lookup != NULL) {
+		g_hash_table_destroy(cause_lookup);
+		cause_lookup = NULL;
+	}
+
+	if (causes_array != NULL) {
+		g_ptr_array_foreach(causes_array, (GFunc)free_cause, NULL);
+		g_ptr_array_free(causes_array, TRUE);
+		causes_array = NULL;
+		causes_array_len = 0;
+	}
+
+	if (dtrans != NULL) {
+		g_free(dtrans);
+		dtrans = NULL;
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/latencytop/common/util.c	Mon Sep 28 13:53:34 2009 -0700
@@ -0,0 +1,312 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2008-2009, Intel Corporation.
+ * All Rights Reserved.
+ */
+
+#include <unistd.h>
+#include <libintl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <procfs.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "latencytop.h"
+
+/* Pipe that breaks the event loop (and exits early) */
+static int signal_pipe[2];
+
+/*
+ * Get current system time in milliseconds (1e-3).
+ */
+uint64_t
+lt_millisecond(void)
+{
+	struct timeval p;
+	(void) gettimeofday(&p, NULL);
+	return ((uint64_t)p.tv_sec * 1000 + p.tv_usec / 1000);
+}
+
+/*
+ * Check if we are out of memory.
+ */
+void
+lt_check_null(void *p)
+{
+	if (p == NULL) {
+		(void) fprintf(stderr, "Out of memory!\n");
+		g_assert(0);
+		exit(2);
+	}
+}
+
+/*
+ * Safe malloc.
+ */
+void *
+lt_malloc(size_t size)
+{
+	void *ret = malloc(size);
+
+	lt_check_null(ret);
+
+	return (ret);
+}
+
+/*
+ * Safe alloc with memory cleared.
+ * It is named "zalloc" because its signature is different from
+ * calloc() in stdlib.
+ */
+void *
+lt_zalloc(size_t size)
+{
+	void *ret = calloc(size, 1);
+
+	lt_check_null(ret);
+
+	return (ret);
+}
+
+/*
+ * Safe strdup.
+ */
+char *
+lt_strdup(const char *str)
+{
+	char *ret = strdup(str);
+
+	lt_check_null(ret);
+
+	return (ret);
+}
+
+/*
+ * Get string for current time, e.g. YYYY-MM-DD
+ */
+void
+lt_time_str(char *buffer, int len)
+{
+	struct tm tms;
+	time_t t;
+	int i;
+
+	(void) time(&t);
+	(void) gmtime_r(&t, &tms);
+	(void) asctime_r(&tms, buffer, len);
+
+	for (i = strlen(buffer)-1; i > 0; --i) {
+
+		if (isspace(buffer[i])) {
+			buffer[i] = '\0';
+		} else {
+			break;
+		}
+	}
+}
+
+/*
+ * Retrieves the process's executable name and arguments from /proc.
+ */
+char *
+lt_get_proc_field(pid_t pid, lt_field_t field)
+{
+	char name[PATH_MAX];
+	int fd;
+	int ret;
+	psinfo_t psinfo;
+
+	(void) snprintf(name, PATH_MAX, "/proc/%d/psinfo", (int)pid);
+	fd = open(name, O_RDONLY);
+
+	if (fd == -1) {
+		return (NULL);
+	}
+
+	ret = read(fd, (char *)&psinfo, sizeof (psinfo_t));
+	(void) close(fd);
+
+	if (ret < 0) {
+		return (NULL);
+	}
+
+	switch (field) {
+	case LT_FIELD_FNAME:
+		return (lt_strdup(psinfo.pr_fname));
+	case LT_FIELD_PSARGS:
+		return (lt_strdup(psinfo.pr_psargs));
+	}
+	return (NULL);
+}
+
+/*
+ * Helper function to update the data structure.
+ */
+void
+lt_update_stat_value(lt_stat_data_t *entry,
+    lt_stat_type_t type, uint64_t value)
+{
+	switch (type) {
+	case LT_STAT_COUNT:
+		entry->lt_s_count += value;
+		break;
+	case LT_STAT_SUM:
+		entry->lt_s_total += value;
+		break;
+	case LT_STAT_MAX:
+		if (value > entry->lt_s_max) {
+			entry->lt_s_max = value;
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+/*
+ * Helper function to sort on total.
+ */
+int
+lt_sort_by_total_desc(lt_stat_entry_t *a, lt_stat_entry_t *b)
+{
+	g_assert(a != NULL && b != NULL);
+	/*
+	 * lt_s_total is of type int64_t, so we can't simply return
+	 * (b->lt_se_data.lt_s_total - a->lt_se_data.lt_s_total).
+	 */
+	if (b->lt_se_data.lt_s_total > a->lt_se_data.lt_s_total) {
+		return (1);
+	} else if (b->lt_se_data.lt_s_total < a->lt_se_data.lt_s_total) {
+		return (-1);
+	} else {
+		return (0);
+	}
+}
+
+/*
+ * Helper function to sort on max.
+ */
+int
+lt_sort_by_max_desc(lt_stat_entry_t *a, lt_stat_entry_t *b)
+{
+	g_assert(a != NULL && b != NULL);
+
+	if (b->lt_se_data.lt_s_max > a->lt_se_data.lt_s_max) {
+		return (1);
+	} else if (b->lt_se_data.lt_s_max < a->lt_se_data.lt_s_max) {
+		return (-1);
+	} else {
+		return (0);
+	}
+}
+
+/*
+ * Helper function to sort on count.
+ */
+int
+lt_sort_by_count_desc(lt_stat_entry_t *a, lt_stat_entry_t *b)
+{
+	g_assert(a != NULL && b != NULL);
+
+	if (b->lt_se_data.lt_s_count > a->lt_se_data.lt_s_count) {
+		return (1);
+	} else if (b->lt_se_data.lt_s_count < a->lt_se_data.lt_s_count) {
+		return (-1);
+	} else {
+		return (0);
+	}
+}
+
+/*
+ * Helper function to sort on average.
+ */
+int
+lt_sort_by_avg_desc(lt_stat_entry_t *a, lt_stat_entry_t *b)
+{
+	double avg_a, avg_b;
+
+	g_assert(a != NULL && b != NULL);
+
+	avg_a = (double)a->lt_se_data.lt_s_total / a->lt_se_data.lt_s_count;
+	avg_b = (double)b->lt_se_data.lt_s_total / b->lt_se_data.lt_s_count;
+
+	if (avg_b > avg_a) {
+		return (1);
+	} else if (avg_b < avg_a) {
+		return (-1);
+	} else {
+		return (0);
+	}
+}
+
+/*
+ * Create pipe for signal handler and wakeup.
+ */
+void
+lt_gpipe_init(void)
+{
+	(void) pipe(signal_pipe);
+}
+
+/*
+ * Close the pipe used in signal handler.
+ */
+void
+lt_gpipe_deinit(void)
+{
+	(void) close(signal_pipe[0]);
+	(void) close(signal_pipe[1]);
+}
+
+/*
+ * Break early from the main loop.
+ */
+void
+lt_gpipe_break(const char *ch)
+{
+	(void) write(signal_pipe[1], ch, 1);
+}
+
+int
+lt_gpipe_readfd(void)
+{
+	return (signal_pipe[0]);
+}
+
+/*
+ * Check if the given file exists.
+ */
+int
+lt_file_exist(const char *name)
+{
+	struct stat64 st;
+
+	if (stat64(name, &st) == 0) {
+		return (1);
+	} else {
+		return (0);
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/latencytop/i386/Makefile	Mon Sep 28 13:53:34 2009 -0700
@@ -0,0 +1,29 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2008-2009, Intel Corporation.
+# All Rights Reserved.
+#
+
+include ../Makefile.com
+
+WRAPOPT =
+
+install: all $(ROOTPROG32)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/latencytop/sparcv9/Makefile	Mon Sep 28 13:53:34 2009 -0700
@@ -0,0 +1,30 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2008-2009, Intel Corporation.
+# All Rights Reserved.
+#
+
+include ../Makefile.com
+include ../../Makefile.cmd.64
+
+WRAPOPT = -64
+
+install: all $(ROOTPROG64)
--- a/usr/src/pkgdefs/Makefile	Mon Sep 28 14:54:22 2009 -0700
+++ b/usr/src/pkgdefs/Makefile	Mon Sep 28 13:53:34 2009 -0700
@@ -323,6 +323,7 @@
 	SUNWixgbe \
 	SUNWkrbr \
 	SUNWkrbu \
+	SUNWlatencytop \
 	SUNWldskint \
 	SUNWlibsasl \
 	SUNWllc \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWlatencytop/Makefile	Mon Sep 28 13:53:34 2009 -0700
@@ -0,0 +1,34 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2008-2009, Intel Corporation.
+# All Rights Reserved.
+#
+
+include ../Makefile.com
+
+.KEEP_STATE:
+
+all: $(FILES) depend
+install: all pkg
+
+include ../Makefile.targ
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWlatencytop/depend	Mon Sep 28 13:53:34 2009 -0700
@@ -0,0 +1,31 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2008-2009, Intel Corporation.
+# All Rights Reserved.
+#
+
+P SUNWcsu	Core Solaris, (Usr)
+P SUNWcslr	Core Solaris Libraries (Root)
+P SUNWdtrc	DTrace Clients 
+P SUNWdtrp	DTrace Providers
+P SUNWGlib	GLIB - Library of useful routines for C programming
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWlatencytop/pkginfo.tmpl	Mon Sep 28 13:53:34 2009 -0700
@@ -0,0 +1,50 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2008-2009, Intel Corporation.
+# All Rights Reserved.
+#
+
+#
+# This required package information file describes characteristics of the
+# package, such as package abbreviation, full package name, package version,
+# and package architecture.
+#
+PKG="SUNWlatencytop"
+NAME="LatencyTOP tool"
+ARCH="ISA"
+VERSION="ONVERS,REV=0.0.0"
+CATEGORY="system"
+SUNW_PRODNAME="SunOS"
+SUNW_PRODVERS="RELEASE/VERSION"
+DESC="LatencyTOP tool"
+BASEDIR=/
+SUNW_PKGVERS="1.0"
+SUNW_PKGTYPE="usr"
+VENDOR="Sun Microsystems, Inc."
+HOTLINE="Please contact your local service provider"
+EMAIL=""
+MAXINST="1000"
+CLASSES="none"
+SUNW_PKG_ALLZONES="true"
+SUNW_PKG_HOLLOW="false"
+SUNW_PKG_THISZONE="false"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWlatencytop/prototype_com	Mon Sep 28 13:53:34 2009 -0700
@@ -0,0 +1,48 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2008-2009, Intel Corporation.
+# All Rights Reserved.
+#
+
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+
+#!search <pathname pathname ...>	# where to find pkg objects
+#!include <filename>			# include another 'prototype' file
+#!default <mode> <owner> <group>	# default used if not specified on entry
+#!<param>=<value>			# puts parameter in pkg environment
+
+# packaging files
+i pkginfo
+i copyright
+i depend
+#
+# source locations relative to the prototype file
+#
+# SUNWlatencytop
+#
+d none usr 0755 root sys
+d none usr/bin 0755 root bin
+l none usr/bin/latencytop=../../usr/lib/isaexec
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWlatencytop/prototype_i386	Mon Sep 28 13:53:34 2009 -0700
@@ -0,0 +1,54 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2008-2009, Intel Corporation.
+# All Rights Reserved.
+#
+
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+
+#!search <pathname pathname ...>	# where to find pkg objects
+#!include <filename>			# include another 'prototype' file
+#!default <mode> <owner> <group>	# default used if not specified on entry
+#!<param>=<value>			# puts parameter in pkg environment
+
+#
+# Include ISA independent files (prototype_com)
+#
+!include prototype_com
+#
+#
+#
+# List files which are I386 specific here
+#
+# source locations relative to the prototype file
+#
+#
+# SUNWlatencytop
+#
+d none usr/bin/i86 755 root bin
+f none usr/bin/i86/latencytop 555 root bin
+d none usr/bin/amd64 755 root bin
+f none usr/bin/amd64/latencytop 555 root bin
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWlatencytop/prototype_sparc	Mon Sep 28 13:53:34 2009 -0700
@@ -0,0 +1,52 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2008-2009, Intel Corporation.
+# All Rights Reserved.
+#
+
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+
+#!search <pathname pathname ...>	# where to find pkg objects
+#!include <filename>			# include another 'prototype' file
+#!default <mode> <owner> <group>	# default used if not specified on entry
+#!<param>=<value>			# puts parameter in pkg environment
+
+#
+# Include ISA independent files (prototype_com)
+#
+!include prototype_com
+#
+#
+#
+# List files which are SPARC specific here
+#
+# source locations relative to the prototype file
+#
+#
+# SUNWlatencytop
+#
+d none usr/bin/sparcv9 755 root bin
+f none usr/bin/sparcv9/latencytop 555 root bin