changeset 13451:4349f1403dad

1393 dis does not handle sections with unassigned virtual addresses well 1409 dis is overly restrictive when looking for it's next symbol Reviewed by: Rich Lowe <richlowe@richlowe.net> Reviewed by: Eric Schrock <eric.schrock@delphix.com> Approved by: Albert Lee <trisk@nexenta.com>
author Jason King <jason.brian.king+illumoshg@gmail.com>
date Wed, 14 Sep 2011 11:28:33 -0500
parents 911d80518e4e
children 6bec9720e054
files usr/src/cmd/dis/dis_main.c usr/src/cmd/dis/dis_target.c usr/src/cmd/dis/dis_target.h
diffstat 3 files changed, 161 insertions(+), 40 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/cmd/dis/dis_main.c	Tue Sep 13 12:11:38 2011 -0400
+++ b/usr/src/cmd/dis/dis_main.c	Wed Sep 14 11:28:33 2011 -0500
@@ -22,10 +22,10 @@
 /*
  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright 2011 Jason King.  All rights reserved.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <ctype.h>
 #include <getopt.h>
 #include <stdio.h>
@@ -108,6 +108,7 @@
 	char buf[BUFSIZE];
 	char symbuf[BUFSIZE];
 	const char *symbol;
+	const char *last_symbol;
 	off_t symoffset;
 	int i;
 	int bytesperline;
@@ -125,22 +126,34 @@
 	if ((bytesperline = dis_max_instrlen(dhp)) > 6)
 		bytesperline = 6;
 
+	symbol = NULL;
+
 	while (addr < db.db_addr + db.db_size) {
 
 		if (dis_disassemble(dhp, addr, buf, BUFSIZE) != 0) {
+#if defined(__sparc)
 			/*
-			 * If we encounter an invalid opcode, we just
-			 * print "*** invalid opcode ***" at that first bad
-			 * instruction and continue with printing the rest
-			 * of the instruction stream as hex data,
-			 * We then find the next valid symbol in the section,
-			 * and disassemble from there.
+			 * Since sparc instructions are fixed size, we
+			 * always know the address of the next instruction
 			 */
+			(void) snprintf(buf, sizeof (buf),
+			    "*** invalid opcode ***");
+			db.db_nextaddr = addr + 4;
+
+#else
 			off_t next;
 
 			(void) snprintf(buf, sizeof (buf),
 			    "*** invalid opcode ***");
 
+			/*
+			 * On architectures with variable sized instructions
+			 * we have no way to figure out where the next
+			 * instruction starts if we encounter an invalid
+			 * instruction.  Instead we print the rest of the
+			 * instruction stream as hex until we reach the
+			 * next valid symbol in the section.
+			 */
 			if ((next = dis_tgt_next_symbol(tgt, addr)) == 0) {
 				db.db_nextaddr = db.db_addr + db.db_size;
 			} else {
@@ -150,6 +163,7 @@
 				else
 					db.db_nextaddr = addr + next;
 			}
+#endif
 		}
 
 		/*
@@ -168,12 +182,19 @@
 		 * based on the maximum width that the current symbol can be.
 		 * This at least produces text aligned within each function.
 		 */
+		last_symbol = symbol;
 		symbol = dis_tgt_lookup(tgt, addr, &symoffset, 1, &symsize,
 		    &isfunc);
-		/* Get the maximum length for this symbol */
-		getsymname(addr, symbol, symsize, symbuf, sizeof (symbuf));
-		symwidth = MAX(strlen(symbuf), MINSYMWIDTH);
+		if (symbol == NULL) {
+			symbol = dis_find_section(tgt, addr, &symoffset);
+			symsize = symoffset;
+		}
 
+		if (symbol != last_symbol)
+			getsymname(addr, symbol, symsize, symbuf,
+			    sizeof (symbuf));
+
+		symwidth = MAX(symwidth, strlen(symbuf));
 		getsymname(addr, symbol, symoffset, symbuf, sizeof (symbuf));
 
 		/*
@@ -472,21 +493,23 @@
 			break;
 
 		case EM_SPARC32PLUS:
+		{
+			uint64_t flags = ehdr.e_flags & EF_SPARC_32PLUS_MASK;
+
 			if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
 			    ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
 				warn("invalid E_IDENT field for SPARC object");
 				return;
 			}
 
-			switch (ehdr.e_flags & EF_SPARC_32PLUS_MASK) {
-			case (EF_SPARC_32PLUS | EF_SPARC_SUN_US1 |
-			    EF_SPARC_SUN_US3):
-			case (EF_SPARC_32PLUS | EF_SPARC_SUN_US1):
+			if (flags != 0 &&
+			    (flags & (EF_SPARC_32PLUS | EF_SPARC_SUN_US1 |
+			    EF_SPARC_SUN_US3)) != EF_SPARC_32PLUS)
 				g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
-			default:
+			else
 				g_flags |= DIS_SPARC_V9;
-			}
 			break;
+		}
 
 		case EM_SPARCV9:
 			if (ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
--- a/usr/src/cmd/dis/dis_target.c	Tue Sep 13 12:11:38 2011 -0400
+++ b/usr/src/cmd/dis/dis_target.c	Wed Sep 14 11:28:33 2011 -0500
@@ -21,6 +21,8 @@
 
 /*
  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright 2011 Jason King.  All rights reserved.
  */
 
 #include <assert.h>
@@ -34,6 +36,8 @@
 
 #include <sys/fcntl.h>
 #include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
 
 #include "dis_target.h"
 #include "dis_util.h"
@@ -58,6 +62,19 @@
 } sym_entry_t;
 
 /*
+ * Create a map of the virtual address ranges of every section.  This will
+ * allow us to create dummpy mappings for unassigned addresses.  Otherwise
+ * multiple sections with unassigned addresses will appear to overlap and
+ * mess up symbol resolution (which uses the virtual address).
+ */
+typedef struct dis_shnmap {
+	const char 	*dm_name;	/* name of section */
+	uint64_t	dm_start;	/* virtual address of section */
+	size_t		dm_length;	/* address length */
+	boolean_t	dm_mapped;	/* did we assign the mapping */
+} dis_shnmap_t;
+
+/*
  * Target data structure.  This structure keeps track of the ELF file
  * information, a few bits of pre-processed section index information, and
  * sorted versions of the symbol table.  We also keep track of the last symbol
@@ -75,6 +92,8 @@
 	int		dt_symcount;	/* # of symbol table entries */
 	struct dis_tgt	*dt_next;	/* next target (for archives) */
 	Elf_Arhdr	*dt_arhdr;	/* archive header (for archives) */
+	dis_shnmap_t	*dt_shnmap;	/* section address map */
+	size_t		dt_shncount;	/* # of sections in target */
 };
 
 /*
@@ -105,17 +124,23 @@
 #define	IS_DATA_TYPE(tp)	(((1 << (tp)) & DATA_TYPES) != 0)
 
 /*
- * Pick out the best symbol to used based on the sections available in the
- * target.  We prefer SHT_SYMTAB over SHT_DYNSYM.
+ * Save the virtual address range for this section and select the
+ * best section to use as the symbol table.  We prefer SHT_SYMTAB
+ * over SHT_DYNSYM.
  */
 /* ARGSUSED */
 static void
-get_symtab(dis_tgt_t *tgt, dis_scn_t *scn, void *data)
+tgt_scn_init(dis_tgt_t *tgt, dis_scn_t *scn, void *data)
 {
 	int *index = data;
 
 	*index += 1;
 
+	tgt->dt_shnmap[*index].dm_name = scn->ds_name;
+	tgt->dt_shnmap[*index].dm_start = scn->ds_shdr.sh_addr;
+	tgt->dt_shnmap[*index].dm_length = scn->ds_shdr.sh_size;
+	tgt->dt_shnmap[*index].dm_mapped = B_FALSE;
+
 	/*
 	 * Prefer SHT_SYMTAB over SHT_DYNSYM
 	 */
@@ -292,6 +317,14 @@
 			continue;
 		}
 
+		/*
+		 * If we had to map this section, its symbol value
+		 * also needs to be mapped.
+		 */
+		if (tgt->dt_shnmap[sym->se_shndx].dm_mapped)
+			sym->se_sym.st_value +=
+			    tgt->dt_shnmap[sym->se_shndx].dm_start;
+
 		sym++;
 	}
 
@@ -304,6 +337,40 @@
 }
 
 /*
+ * Assign virtual address ranges for sections that need it
+ */
+static void
+create_addrmap(dis_tgt_t *tgt)
+{
+	uint64_t addr;
+	int i;
+
+	if (tgt->dt_shnmap == NULL)
+		return;
+
+	/* find the greatest used address */
+	for (addr = 0, i = 1; i < tgt->dt_shncount; i++)
+		if (tgt->dt_shnmap[i].dm_start > addr)
+			addr = tgt->dt_shnmap[i].dm_start +
+			    tgt->dt_shnmap[i].dm_length;
+
+	addr = P2ROUNDUP(addr, 0x1000);
+
+	/*
+	 * Assign section a starting address beyond the largest mapped section
+	 * if no address was given.
+	 */
+	for (i = 1; i < tgt->dt_shncount; i++) {
+		if (tgt->dt_shnmap[i].dm_start != 0)
+			continue;
+
+		tgt->dt_shnmap[i].dm_start = addr;
+		tgt->dt_shnmap[i].dm_mapped = B_TRUE;
+		addr = P2ROUNDUP(addr + tgt->dt_shnmap[i].dm_length, 0x1000);
+	}
+}
+
+/*
  * Create a target backed by an ELF file.
  */
 dis_tgt_t *
@@ -393,9 +460,14 @@
 			return (NULL);
 		}
 
+		current->dt_shnmap = safe_malloc(sizeof (dis_shnmap_t) *
+		    ehdr.e_shnum);
+		current->dt_shncount = ehdr.e_shnum;
+
 		idx = 0;
-		dis_tgt_section_iter(current, get_symtab, &idx);
+		dis_tgt_section_iter(current, tgt_scn_init, &idx);
 
+		create_addrmap(current);
 		if (current->dt_symidx != 0)
 			construct_symtab(current);
 
@@ -488,6 +560,28 @@
 }
 
 /*
+ * Given an address, return the section it is in and set the offset within
+ * the section.
+ */
+const char *
+dis_find_section(dis_tgt_t *tgt, uint64_t addr, off_t *offset)
+{
+	int i;
+
+	for (i = 1; i < tgt->dt_shncount; i++) {
+		if ((addr >= tgt->dt_shnmap[i].dm_start) &&
+		    (addr < tgt->dt_shnmap[i].dm_start +
+		    tgt->dt_shnmap[i].dm_length)) {
+			*offset = addr - tgt->dt_shnmap[i].dm_start;
+			return (tgt->dt_shnmap[i].dm_name);
+		}
+	}
+
+	*offset = 0;
+	return (NULL);
+}
+
+/*
  * Given an address, returns the name of the corresponding symbol, as well as
  * the offset within that symbol.  If no matching symbol is found, then NULL is
  * returned.
@@ -577,29 +671,20 @@
 
 /*
  * Given an address, return the starting offset of the next symbol in the file.
- * Relies on the fact that this is only used when we encounter a bad instruction
- * in the input stream, so we know that the last symbol looked up will be in the
- * cache.
  */
 off_t
 dis_tgt_next_symbol(dis_tgt_t *tgt, uint64_t addr)
 {
-	sym_entry_t *sym = tgt->dt_symcache;
-	uint64_t start;
-
-	/* make sure the cached symbol and address are valid */
-	if (sym == NULL || addr < sym->se_sym.st_value ||
-	    addr >= sym->se_sym.st_value + sym->se_sym.st_size)
-		return (0);
+	sym_entry_t *sym;
 
-	start = sym->se_sym.st_value;
+	for (sym = tgt->dt_symcache;
+	    sym != tgt->dt_symtab + tgt->dt_symcount;
+	    sym++) {
+		if (sym->se_sym.st_value >= addr)
+			return (sym->se_sym.st_value - addr);
+	}
 
-	/* find the next symbol */
-	while (sym != tgt->dt_symtab + tgt->dt_symcount &&
-	    sym->se_sym.st_value == start)
-		sym++;
-
-	return (sym->se_sym.st_value - addr);
+	return (0);
 }
 
 /*
@@ -635,6 +720,15 @@
 			continue;
 		}
 
+		/*
+		 * dis_tgt_section_iter is also used before the section map
+		 * is initialized, so only check when we need to.  If the
+		 * section map is uninitialized, it will return 0 and have
+		 * no net effect.
+		 */
+		if (sdata.ds_shdr.sh_addr == 0)
+			sdata.ds_shdr.sh_addr = tgt->dt_shnmap[idx].dm_start;
+
 		func(tgt, &sdata, data);
 	}
 }
@@ -740,6 +834,9 @@
 			continue;
 		}
 
+		if (tgt->dt_shnmap[sym->se_shndx].dm_mapped)
+			shdr.sh_addr = tgt->dt_shnmap[sym->se_shndx].dm_start;
+
 		/*
 		 * Verify that the address lies within the section that we think
 		 * it does.
--- a/usr/src/cmd/dis/dis_target.h	Tue Sep 13 12:11:38 2011 -0400
+++ b/usr/src/cmd/dis/dis_target.h	Wed Sep 14 11:28:33 2011 -0500
@@ -22,13 +22,13 @@
 /*
  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright 2011 Jason King.  All rights reserved.
  */
 
 #ifndef	_DIS_TARGET_H
 #define	_DIS_TARGET_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -50,6 +50,7 @@
 void dis_tgt_destroy(dis_tgt_t *);
 const char *dis_tgt_lookup(dis_tgt_t *, uint64_t, off_t *, int, size_t *,
     int *);
+const char *dis_find_section(dis_tgt_t *, uint64_t, off_t *);
 const char *dis_tgt_name(dis_tgt_t *);
 const char *dis_tgt_member(dis_tgt_t *);
 void dis_tgt_ehdr(dis_tgt_t *, GElf_Ehdr *);