Mercurial > illumos > illumos-gate
changeset 13451:4349f1403dad
1393 dis does not handle sections with unassigned virtual addresses well
1409 dis is overly restrictive when looking for it's next symbol
Reviewed by: Rich Lowe <richlowe@richlowe.net>
Reviewed by: Eric Schrock <eric.schrock@delphix.com>
Approved by: Albert Lee <trisk@nexenta.com>
author | Jason King <jason.brian.king+illumoshg@gmail.com> |
---|---|
date | Wed, 14 Sep 2011 11:28:33 -0500 |
parents | 911d80518e4e |
children | 6bec9720e054 |
files | usr/src/cmd/dis/dis_main.c usr/src/cmd/dis/dis_target.c usr/src/cmd/dis/dis_target.h |
diffstat | 3 files changed, 161 insertions(+), 40 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/cmd/dis/dis_main.c Tue Sep 13 12:11:38 2011 -0400 +++ b/usr/src/cmd/dis/dis_main.c Wed Sep 14 11:28:33 2011 -0500 @@ -22,10 +22,10 @@ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * + * Copyright 2011 Jason King. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <ctype.h> #include <getopt.h> #include <stdio.h> @@ -108,6 +108,7 @@ char buf[BUFSIZE]; char symbuf[BUFSIZE]; const char *symbol; + const char *last_symbol; off_t symoffset; int i; int bytesperline; @@ -125,22 +126,34 @@ if ((bytesperline = dis_max_instrlen(dhp)) > 6) bytesperline = 6; + symbol = NULL; + while (addr < db.db_addr + db.db_size) { if (dis_disassemble(dhp, addr, buf, BUFSIZE) != 0) { +#if defined(__sparc) /* - * If we encounter an invalid opcode, we just - * print "*** invalid opcode ***" at that first bad - * instruction and continue with printing the rest - * of the instruction stream as hex data, - * We then find the next valid symbol in the section, - * and disassemble from there. + * Since sparc instructions are fixed size, we + * always know the address of the next instruction */ + (void) snprintf(buf, sizeof (buf), + "*** invalid opcode ***"); + db.db_nextaddr = addr + 4; + +#else off_t next; (void) snprintf(buf, sizeof (buf), "*** invalid opcode ***"); + /* + * On architectures with variable sized instructions + * we have no way to figure out where the next + * instruction starts if we encounter an invalid + * instruction. Instead we print the rest of the + * instruction stream as hex until we reach the + * next valid symbol in the section. + */ if ((next = dis_tgt_next_symbol(tgt, addr)) == 0) { db.db_nextaddr = db.db_addr + db.db_size; } else { @@ -150,6 +163,7 @@ else db.db_nextaddr = addr + next; } +#endif } /* @@ -168,12 +182,19 @@ * based on the maximum width that the current symbol can be. * This at least produces text aligned within each function. */ + last_symbol = symbol; symbol = dis_tgt_lookup(tgt, addr, &symoffset, 1, &symsize, &isfunc); - /* Get the maximum length for this symbol */ - getsymname(addr, symbol, symsize, symbuf, sizeof (symbuf)); - symwidth = MAX(strlen(symbuf), MINSYMWIDTH); + if (symbol == NULL) { + symbol = dis_find_section(tgt, addr, &symoffset); + symsize = symoffset; + } + if (symbol != last_symbol) + getsymname(addr, symbol, symsize, symbuf, + sizeof (symbuf)); + + symwidth = MAX(symwidth, strlen(symbuf)); getsymname(addr, symbol, symoffset, symbuf, sizeof (symbuf)); /* @@ -472,21 +493,23 @@ break; case EM_SPARC32PLUS: + { + uint64_t flags = ehdr.e_flags & EF_SPARC_32PLUS_MASK; + if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 || ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { warn("invalid E_IDENT field for SPARC object"); return; } - switch (ehdr.e_flags & EF_SPARC_32PLUS_MASK) { - case (EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | - EF_SPARC_SUN_US3): - case (EF_SPARC_32PLUS | EF_SPARC_SUN_US1): + if (flags != 0 && + (flags & (EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | + EF_SPARC_SUN_US3)) != EF_SPARC_32PLUS) g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI; - default: + else g_flags |= DIS_SPARC_V9; - } break; + } case EM_SPARCV9: if (ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
--- a/usr/src/cmd/dis/dis_target.c Tue Sep 13 12:11:38 2011 -0400 +++ b/usr/src/cmd/dis/dis_target.c Wed Sep 14 11:28:33 2011 -0500 @@ -21,6 +21,8 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * + * Copyright 2011 Jason King. All rights reserved. */ #include <assert.h> @@ -34,6 +36,8 @@ #include <sys/fcntl.h> #include <sys/stat.h> +#include <sys/sysmacros.h> +#include <sys/types.h> #include "dis_target.h" #include "dis_util.h" @@ -58,6 +62,19 @@ } sym_entry_t; /* + * Create a map of the virtual address ranges of every section. This will + * allow us to create dummpy mappings for unassigned addresses. Otherwise + * multiple sections with unassigned addresses will appear to overlap and + * mess up symbol resolution (which uses the virtual address). + */ +typedef struct dis_shnmap { + const char *dm_name; /* name of section */ + uint64_t dm_start; /* virtual address of section */ + size_t dm_length; /* address length */ + boolean_t dm_mapped; /* did we assign the mapping */ +} dis_shnmap_t; + +/* * Target data structure. This structure keeps track of the ELF file * information, a few bits of pre-processed section index information, and * sorted versions of the symbol table. We also keep track of the last symbol @@ -75,6 +92,8 @@ int dt_symcount; /* # of symbol table entries */ struct dis_tgt *dt_next; /* next target (for archives) */ Elf_Arhdr *dt_arhdr; /* archive header (for archives) */ + dis_shnmap_t *dt_shnmap; /* section address map */ + size_t dt_shncount; /* # of sections in target */ }; /* @@ -105,17 +124,23 @@ #define IS_DATA_TYPE(tp) (((1 << (tp)) & DATA_TYPES) != 0) /* - * Pick out the best symbol to used based on the sections available in the - * target. We prefer SHT_SYMTAB over SHT_DYNSYM. + * Save the virtual address range for this section and select the + * best section to use as the symbol table. We prefer SHT_SYMTAB + * over SHT_DYNSYM. */ /* ARGSUSED */ static void -get_symtab(dis_tgt_t *tgt, dis_scn_t *scn, void *data) +tgt_scn_init(dis_tgt_t *tgt, dis_scn_t *scn, void *data) { int *index = data; *index += 1; + tgt->dt_shnmap[*index].dm_name = scn->ds_name; + tgt->dt_shnmap[*index].dm_start = scn->ds_shdr.sh_addr; + tgt->dt_shnmap[*index].dm_length = scn->ds_shdr.sh_size; + tgt->dt_shnmap[*index].dm_mapped = B_FALSE; + /* * Prefer SHT_SYMTAB over SHT_DYNSYM */ @@ -292,6 +317,14 @@ continue; } + /* + * If we had to map this section, its symbol value + * also needs to be mapped. + */ + if (tgt->dt_shnmap[sym->se_shndx].dm_mapped) + sym->se_sym.st_value += + tgt->dt_shnmap[sym->se_shndx].dm_start; + sym++; } @@ -304,6 +337,40 @@ } /* + * Assign virtual address ranges for sections that need it + */ +static void +create_addrmap(dis_tgt_t *tgt) +{ + uint64_t addr; + int i; + + if (tgt->dt_shnmap == NULL) + return; + + /* find the greatest used address */ + for (addr = 0, i = 1; i < tgt->dt_shncount; i++) + if (tgt->dt_shnmap[i].dm_start > addr) + addr = tgt->dt_shnmap[i].dm_start + + tgt->dt_shnmap[i].dm_length; + + addr = P2ROUNDUP(addr, 0x1000); + + /* + * Assign section a starting address beyond the largest mapped section + * if no address was given. + */ + for (i = 1; i < tgt->dt_shncount; i++) { + if (tgt->dt_shnmap[i].dm_start != 0) + continue; + + tgt->dt_shnmap[i].dm_start = addr; + tgt->dt_shnmap[i].dm_mapped = B_TRUE; + addr = P2ROUNDUP(addr + tgt->dt_shnmap[i].dm_length, 0x1000); + } +} + +/* * Create a target backed by an ELF file. */ dis_tgt_t * @@ -393,9 +460,14 @@ return (NULL); } + current->dt_shnmap = safe_malloc(sizeof (dis_shnmap_t) * + ehdr.e_shnum); + current->dt_shncount = ehdr.e_shnum; + idx = 0; - dis_tgt_section_iter(current, get_symtab, &idx); + dis_tgt_section_iter(current, tgt_scn_init, &idx); + create_addrmap(current); if (current->dt_symidx != 0) construct_symtab(current); @@ -488,6 +560,28 @@ } /* + * Given an address, return the section it is in and set the offset within + * the section. + */ +const char * +dis_find_section(dis_tgt_t *tgt, uint64_t addr, off_t *offset) +{ + int i; + + for (i = 1; i < tgt->dt_shncount; i++) { + if ((addr >= tgt->dt_shnmap[i].dm_start) && + (addr < tgt->dt_shnmap[i].dm_start + + tgt->dt_shnmap[i].dm_length)) { + *offset = addr - tgt->dt_shnmap[i].dm_start; + return (tgt->dt_shnmap[i].dm_name); + } + } + + *offset = 0; + return (NULL); +} + +/* * Given an address, returns the name of the corresponding symbol, as well as * the offset within that symbol. If no matching symbol is found, then NULL is * returned. @@ -577,29 +671,20 @@ /* * Given an address, return the starting offset of the next symbol in the file. - * Relies on the fact that this is only used when we encounter a bad instruction - * in the input stream, so we know that the last symbol looked up will be in the - * cache. */ off_t dis_tgt_next_symbol(dis_tgt_t *tgt, uint64_t addr) { - sym_entry_t *sym = tgt->dt_symcache; - uint64_t start; - - /* make sure the cached symbol and address are valid */ - if (sym == NULL || addr < sym->se_sym.st_value || - addr >= sym->se_sym.st_value + sym->se_sym.st_size) - return (0); + sym_entry_t *sym; - start = sym->se_sym.st_value; + for (sym = tgt->dt_symcache; + sym != tgt->dt_symtab + tgt->dt_symcount; + sym++) { + if (sym->se_sym.st_value >= addr) + return (sym->se_sym.st_value - addr); + } - /* find the next symbol */ - while (sym != tgt->dt_symtab + tgt->dt_symcount && - sym->se_sym.st_value == start) - sym++; - - return (sym->se_sym.st_value - addr); + return (0); } /* @@ -635,6 +720,15 @@ continue; } + /* + * dis_tgt_section_iter is also used before the section map + * is initialized, so only check when we need to. If the + * section map is uninitialized, it will return 0 and have + * no net effect. + */ + if (sdata.ds_shdr.sh_addr == 0) + sdata.ds_shdr.sh_addr = tgt->dt_shnmap[idx].dm_start; + func(tgt, &sdata, data); } } @@ -740,6 +834,9 @@ continue; } + if (tgt->dt_shnmap[sym->se_shndx].dm_mapped) + shdr.sh_addr = tgt->dt_shnmap[sym->se_shndx].dm_start; + /* * Verify that the address lies within the section that we think * it does.
--- a/usr/src/cmd/dis/dis_target.h Tue Sep 13 12:11:38 2011 -0400 +++ b/usr/src/cmd/dis/dis_target.h Wed Sep 14 11:28:33 2011 -0500 @@ -22,13 +22,13 @@ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * + * Copyright 2011 Jason King. All rights reserved. */ #ifndef _DIS_TARGET_H #define _DIS_TARGET_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -50,6 +50,7 @@ void dis_tgt_destroy(dis_tgt_t *); const char *dis_tgt_lookup(dis_tgt_t *, uint64_t, off_t *, int, size_t *, int *); +const char *dis_find_section(dis_tgt_t *, uint64_t, off_t *); const char *dis_tgt_name(dis_tgt_t *); const char *dis_tgt_member(dis_tgt_t *); void dis_tgt_ehdr(dis_tgt_t *, GElf_Ehdr *);