517a7adb11
hwpmc has been utterly broken for userspace binaries, and has been labeling all samples from userspace binaries as dubious frames. The issues are that: -The check for ph.p_offset & (-ph.p_align) == 0 was mostly bogus. The intent was to ignore all executable segments other than the first, which when using BFD appeared in the first page, but with current LLD a read-only data segment appears before the executable segment, pushing the latter into the second page or later. This meant no executable segment was ever found, and thus pi_vaddr remained 0. Instead of relying on BFD's layout, track whether we've seen an executable segment explicitly with a local bool. -Shared libraries were not parsing the segments to calculate pi_vaddr, resulting in it always being 0. Again, when using BFD, the executable segment started at the first page, and so pi_vaddr was genuinely meant to be 0, but not with LLD's current layout. This meant that pmcstat_image_link's offset calculation gave the base address of the segment in memory, rather than the base address of the whole library in memory, and so when adding that to pi_start/pi_end to get the range of the executable sections in memory it double-counted the offset of the first executable segment within the library. Thus we need to do the exact same parsing for ET_DYN as we do for ET_EXEC, which is simpler to write as special-casing ET_REL to not look for segments. Note that, whilst PT_INTERP isn't needed for shared libraries, it will be for PIEs, which pmcstat still fails to handle due to not knowing the base address of the PIE; we get the base address for libraries by MAP_IN events, and for rtld by virtue of the process's entry address being rtld's, but have no equivalent for the executable. Fixes courtesy of jrtc27@. Reviewed by: jrtc27, jhb (earlier version) Differential Revision: https://reviews.freebsd.org/D33055 Sponsored by: Netflix
558 lines
14 KiB
C
558 lines
14 KiB
C
/*-
|
|
* Copyright (c) 2003-2008 Joseph Koshy
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#include <sys/types.h>
|
|
#include <sys/cpuset.h>
|
|
#include <sys/param.h>
|
|
#include <sys/endian.h>
|
|
#include <sys/pmc.h>
|
|
#include <sys/sysctl.h>
|
|
#include <sys/imgact_aout.h>
|
|
#include <sys/imgact_elf.h>
|
|
|
|
#include <netinet/in.h>
|
|
|
|
#include <assert.h>
|
|
#include <err.h>
|
|
#include <fcntl.h>
|
|
#include <pmc.h>
|
|
#include <pmclog.h>
|
|
#include <stdbool.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <sysexits.h>
|
|
#include <unistd.h>
|
|
|
|
#include "libpmcstat.h"
|
|
|
|
#define min(A,B) ((A) < (B) ? (A) : (B))
|
|
#define max(A,B) ((A) > (B) ? (A) : (B))
|
|
|
|
/*
|
|
* Add the list of symbols in the given section to the list associated
|
|
* with the object.
|
|
*/
|
|
void
|
|
pmcstat_image_add_symbols(struct pmcstat_image *image, Elf *e,
|
|
Elf_Scn *scn, GElf_Shdr *sh)
|
|
{
|
|
int firsttime;
|
|
size_t n, newsyms, nshsyms, nfuncsyms;
|
|
struct pmcstat_symbol *symptr;
|
|
char *fnname;
|
|
GElf_Sym sym;
|
|
Elf_Data *data;
|
|
|
|
if ((data = elf_getdata(scn, NULL)) == NULL)
|
|
return;
|
|
|
|
/*
|
|
* Determine the number of functions named in this
|
|
* section.
|
|
*/
|
|
|
|
nshsyms = sh->sh_size / sh->sh_entsize;
|
|
for (n = nfuncsyms = 0; n < nshsyms; n++) {
|
|
if (gelf_getsym(data, (int) n, &sym) != &sym)
|
|
return;
|
|
if (GELF_ST_TYPE(sym.st_info) == STT_FUNC)
|
|
nfuncsyms++;
|
|
}
|
|
|
|
if (nfuncsyms == 0)
|
|
return;
|
|
|
|
/*
|
|
* Allocate space for the new entries.
|
|
*/
|
|
firsttime = image->pi_symbols == NULL;
|
|
symptr = reallocarray(image->pi_symbols,
|
|
image->pi_symcount + nfuncsyms, sizeof(*symptr));
|
|
if (symptr == image->pi_symbols) /* realloc() failed. */
|
|
return;
|
|
image->pi_symbols = symptr;
|
|
|
|
/*
|
|
* Append new symbols to the end of the current table.
|
|
*/
|
|
symptr += image->pi_symcount;
|
|
|
|
for (n = newsyms = 0; n < nshsyms; n++) {
|
|
if (gelf_getsym(data, (int) n, &sym) != &sym)
|
|
return;
|
|
if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
|
|
continue;
|
|
|
|
if (sym.st_shndx == STN_UNDEF)
|
|
continue;
|
|
|
|
if (!firsttime && pmcstat_symbol_search(image, sym.st_value))
|
|
continue; /* We've seen this symbol already. */
|
|
|
|
if ((fnname = elf_strptr(e, sh->sh_link, sym.st_name))
|
|
== NULL)
|
|
continue;
|
|
#ifdef __arm__
|
|
/* Remove spurious ARM function name. */
|
|
if (fnname[0] == '$' &&
|
|
(fnname[1] == 'a' || fnname[1] == 't' ||
|
|
fnname[1] == 'd') &&
|
|
fnname[2] == '\0')
|
|
continue;
|
|
#endif
|
|
|
|
symptr->ps_name = pmcstat_string_intern(fnname);
|
|
symptr->ps_start = sym.st_value - image->pi_vaddr;
|
|
symptr->ps_end = symptr->ps_start + sym.st_size;
|
|
|
|
symptr++;
|
|
newsyms++;
|
|
}
|
|
|
|
image->pi_symcount += newsyms;
|
|
if (image->pi_symcount == 0)
|
|
return;
|
|
|
|
assert(newsyms <= nfuncsyms);
|
|
|
|
/*
|
|
* Return space to the system if there were duplicates.
|
|
*/
|
|
if (newsyms < nfuncsyms)
|
|
image->pi_symbols = reallocarray(image->pi_symbols,
|
|
image->pi_symcount, sizeof(*symptr));
|
|
|
|
/*
|
|
* Keep the list of symbols sorted.
|
|
*/
|
|
qsort(image->pi_symbols, image->pi_symcount, sizeof(*symptr),
|
|
pmcstat_symbol_compare);
|
|
|
|
/*
|
|
* Deal with function symbols that have a size of 'zero' by
|
|
* making them extend to the next higher address. These
|
|
* symbols are usually defined in assembly code.
|
|
*/
|
|
for (symptr = image->pi_symbols;
|
|
symptr < image->pi_symbols + (image->pi_symcount - 1);
|
|
symptr++)
|
|
if (symptr->ps_start == symptr->ps_end)
|
|
symptr->ps_end = (symptr+1)->ps_start;
|
|
}
|
|
|
|
/*
|
|
* Record the fact that PC values from 'start' to 'end' come from
|
|
* image 'image'.
|
|
*/
|
|
|
|
void
|
|
pmcstat_image_link(struct pmcstat_process *pp, struct pmcstat_image *image,
|
|
uintfptr_t start)
|
|
{
|
|
struct pmcstat_pcmap *pcm, *pcmnew;
|
|
uintfptr_t offset;
|
|
#ifdef __powerpc__
|
|
unsigned long kernbase;
|
|
size_t kernbase_len;
|
|
#endif
|
|
|
|
assert(image->pi_type != PMCSTAT_IMAGE_UNKNOWN &&
|
|
image->pi_type != PMCSTAT_IMAGE_INDETERMINABLE);
|
|
|
|
if ((pcmnew = malloc(sizeof(*pcmnew))) == NULL)
|
|
err(EX_OSERR, "ERROR: Cannot create a map entry");
|
|
|
|
/*
|
|
* PowerPC kernel is of DYN type and it has a base address
|
|
* where it is initially loaded, before being relocated.
|
|
* As the address in 'start' is where the kernel was relocated to,
|
|
* but the symbols always use the original base address, we need to
|
|
* subtract it to get the correct offset.
|
|
*/
|
|
#ifdef __powerpc__
|
|
if (pp->pp_pid == -1) {
|
|
kernbase = 0;
|
|
kernbase_len = sizeof(kernbase);
|
|
if (sysctlbyname("kern.base_address", &kernbase, &kernbase_len,
|
|
NULL, 0) == -1)
|
|
warnx(
|
|
"WARNING: Could not retrieve kernel base address");
|
|
else
|
|
start -= kernbase;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Adjust the map entry to only cover the text portion
|
|
* of the object.
|
|
*/
|
|
|
|
offset = start - image->pi_vaddr;
|
|
pcmnew->ppm_lowpc = image->pi_start + offset;
|
|
pcmnew->ppm_highpc = image->pi_end + offset;
|
|
pcmnew->ppm_image = image;
|
|
|
|
assert(pcmnew->ppm_lowpc < pcmnew->ppm_highpc);
|
|
|
|
/* Overlapped mmap()'s are assumed to never occur. */
|
|
TAILQ_FOREACH(pcm, &pp->pp_map, ppm_next)
|
|
if (pcm->ppm_lowpc >= pcmnew->ppm_highpc)
|
|
break;
|
|
|
|
if (pcm == NULL)
|
|
TAILQ_INSERT_TAIL(&pp->pp_map, pcmnew, ppm_next);
|
|
else
|
|
TAILQ_INSERT_BEFORE(pcm, pcmnew, ppm_next);
|
|
}
|
|
|
|
/*
|
|
* Determine whether a given executable image is an A.OUT object, and
|
|
* if so, fill in its parameters from the text file.
|
|
* Sets image->pi_type.
|
|
*/
|
|
|
|
void
|
|
pmcstat_image_get_aout_params(struct pmcstat_image *image,
|
|
struct pmcstat_args *args)
|
|
{
|
|
int fd;
|
|
ssize_t nbytes;
|
|
struct exec ex;
|
|
const char *path;
|
|
char buffer[PATH_MAX];
|
|
|
|
path = pmcstat_string_unintern(image->pi_execpath);
|
|
assert(path != NULL);
|
|
|
|
if (image->pi_iskernelmodule)
|
|
errx(EX_SOFTWARE,
|
|
"ERROR: a.out kernel modules are unsupported \"%s\"", path);
|
|
|
|
(void) snprintf(buffer, sizeof(buffer), "%s%s",
|
|
args->pa_fsroot, path);
|
|
|
|
if ((fd = open(buffer, O_RDONLY, 0)) < 0 ||
|
|
(nbytes = read(fd, &ex, sizeof(ex))) < 0) {
|
|
if (args->pa_verbosity >= 2)
|
|
warn("WARNING: Cannot determine type of \"%s\"",
|
|
path);
|
|
image->pi_type = PMCSTAT_IMAGE_INDETERMINABLE;
|
|
if (fd != -1)
|
|
(void) close(fd);
|
|
return;
|
|
}
|
|
|
|
(void) close(fd);
|
|
|
|
if ((unsigned) nbytes != sizeof(ex) ||
|
|
N_BADMAG(ex))
|
|
return;
|
|
|
|
image->pi_type = PMCSTAT_IMAGE_AOUT;
|
|
|
|
/* TODO: the rest of a.out processing */
|
|
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Examine an ELF file to determine the size of its text segment.
|
|
* Sets image->pi_type if anything conclusive can be determined about
|
|
* this image.
|
|
*/
|
|
|
|
void
|
|
pmcstat_image_get_elf_params(struct pmcstat_image *image,
|
|
struct pmcstat_args *args)
|
|
{
|
|
int fd;
|
|
size_t i, nph, nsh;
|
|
const char *path, *elfbase;
|
|
char *p, *endp;
|
|
bool first_exec_segment;
|
|
uintfptr_t minva, maxva;
|
|
Elf *e;
|
|
Elf_Scn *scn;
|
|
GElf_Ehdr eh;
|
|
GElf_Phdr ph;
|
|
GElf_Shdr sh;
|
|
enum pmcstat_image_type image_type;
|
|
char buffer[PATH_MAX];
|
|
char buffer_modules[PATH_MAX];
|
|
|
|
assert(image->pi_type == PMCSTAT_IMAGE_UNKNOWN);
|
|
|
|
image->pi_start = minva = ~(uintfptr_t) 0;
|
|
image->pi_end = maxva = (uintfptr_t) 0;
|
|
image->pi_type = image_type = PMCSTAT_IMAGE_INDETERMINABLE;
|
|
image->pi_isdynamic = 0;
|
|
image->pi_dynlinkerpath = NULL;
|
|
image->pi_vaddr = 0;
|
|
|
|
path = pmcstat_string_unintern(image->pi_execpath);
|
|
assert(path != NULL);
|
|
|
|
/*
|
|
* Look for kernel modules under FSROOT/KERNELPATH/NAME and
|
|
* FSROOT/boot/modules/NAME, and user mode executable objects
|
|
* under FSROOT/PATHNAME.
|
|
*/
|
|
if (image->pi_iskernelmodule) {
|
|
(void) snprintf(buffer, sizeof(buffer), "%s%s/%s",
|
|
args->pa_fsroot, args->pa_kernel, path);
|
|
(void) snprintf(buffer_modules, sizeof(buffer_modules),
|
|
"%s/boot/modules/%s", args->pa_fsroot, path);
|
|
} else {
|
|
(void) snprintf(buffer, sizeof(buffer), "%s%s",
|
|
args->pa_fsroot, path);
|
|
}
|
|
|
|
e = NULL;
|
|
fd = open(buffer, O_RDONLY, 0);
|
|
if (fd < 0 && !image->pi_iskernelmodule) {
|
|
warnx("WARNING: Cannot open \"%s\".",
|
|
buffer);
|
|
goto done;
|
|
}
|
|
if (fd < 0 && (fd = open(buffer_modules, O_RDONLY, 0)) < 0) {
|
|
warnx("WARNING: Cannot open \"%s\" or \"%s\".",
|
|
buffer, buffer_modules);
|
|
goto done;
|
|
}
|
|
if (elf_version(EV_CURRENT) == EV_NONE) {
|
|
warnx("WARNING: failed to init elf\n");
|
|
goto done;
|
|
}
|
|
|
|
if ((e = elf_begin(fd, ELF_C_READ, NULL)) == NULL) {
|
|
warnx("WARNING: Cannot read \"%s\".",
|
|
buffer);
|
|
goto done;
|
|
}
|
|
|
|
if (elf_kind(e) != ELF_K_ELF) {
|
|
if (args->pa_verbosity >= 2)
|
|
warnx("WARNING: Cannot determine the type of \"%s\".",
|
|
buffer);
|
|
goto done;
|
|
}
|
|
|
|
if (gelf_getehdr(e, &eh) != &eh) {
|
|
warnx(
|
|
"WARNING: Cannot retrieve the ELF Header for \"%s\": %s.",
|
|
buffer, elf_errmsg(-1));
|
|
goto done;
|
|
}
|
|
|
|
if (eh.e_type != ET_EXEC && eh.e_type != ET_DYN &&
|
|
!(image->pi_iskernelmodule && eh.e_type == ET_REL)) {
|
|
warnx("WARNING: \"%s\" is of an unsupported ELF type.",
|
|
buffer);
|
|
goto done;
|
|
}
|
|
|
|
image_type = eh.e_ident[EI_CLASS] == ELFCLASS32 ?
|
|
PMCSTAT_IMAGE_ELF32 : PMCSTAT_IMAGE_ELF64;
|
|
|
|
/*
|
|
* Determine the virtual address where an executable would be
|
|
* loaded. Additionally, for dynamically linked executables,
|
|
* save the pathname to the runtime linker.
|
|
*/
|
|
if (eh.e_type != ET_REL) {
|
|
if (elf_getphnum(e, &nph) == 0) {
|
|
warnx(
|
|
"WARNING: Could not determine the number of program headers in \"%s\": %s.",
|
|
buffer,
|
|
elf_errmsg(-1));
|
|
goto done;
|
|
}
|
|
first_exec_segment = true;
|
|
for (i = 0; i < eh.e_phnum; i++) {
|
|
if (gelf_getphdr(e, i, &ph) != &ph) {
|
|
warnx(
|
|
"WARNING: Retrieval of PHDR entry #%ju in \"%s\" failed: %s.",
|
|
(uintmax_t) i, buffer, elf_errmsg(-1));
|
|
goto done;
|
|
}
|
|
switch (ph.p_type) {
|
|
case PT_DYNAMIC:
|
|
image->pi_isdynamic = 1;
|
|
break;
|
|
case PT_INTERP:
|
|
if ((elfbase = elf_rawfile(e, NULL)) == NULL) {
|
|
warnx(
|
|
"WARNING: Cannot retrieve the interpreter for \"%s\": %s.",
|
|
buffer, elf_errmsg(-1));
|
|
goto done;
|
|
}
|
|
image->pi_dynlinkerpath =
|
|
pmcstat_string_intern(elfbase +
|
|
ph.p_offset);
|
|
break;
|
|
case PT_LOAD:
|
|
if ((ph.p_flags & PF_X) != 0 &&
|
|
first_exec_segment) {
|
|
image->pi_vaddr = ph.p_vaddr & (-ph.p_align);
|
|
first_exec_segment = false;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Get the min and max VA associated with this ELF object.
|
|
*/
|
|
if (elf_getshnum(e, &nsh) == 0) {
|
|
warnx(
|
|
"WARNING: Could not determine the number of sections for \"%s\": %s.",
|
|
buffer, elf_errmsg(-1));
|
|
goto done;
|
|
}
|
|
|
|
for (i = 0; i < nsh; i++) {
|
|
if ((scn = elf_getscn(e, i)) == NULL ||
|
|
gelf_getshdr(scn, &sh) != &sh) {
|
|
warnx(
|
|
"WARNING: Could not retrieve section header #%ju in \"%s\": %s.",
|
|
(uintmax_t) i, buffer, elf_errmsg(-1));
|
|
goto done;
|
|
}
|
|
if (sh.sh_flags & SHF_EXECINSTR) {
|
|
minva = min(minva, sh.sh_addr);
|
|
maxva = max(maxva, sh.sh_addr + sh.sh_size);
|
|
}
|
|
if (sh.sh_type == SHT_SYMTAB || sh.sh_type == SHT_DYNSYM)
|
|
pmcstat_image_add_symbols(image, e, scn, &sh);
|
|
}
|
|
|
|
image->pi_start = minva;
|
|
image->pi_end = maxva;
|
|
image->pi_type = image_type;
|
|
image->pi_fullpath = pmcstat_string_intern(buffer);
|
|
|
|
/* Build display name
|
|
*/
|
|
endp = buffer;
|
|
for (p = buffer; *p; p++)
|
|
if (*p == '/')
|
|
endp = p+1;
|
|
image->pi_name = pmcstat_string_intern(endp);
|
|
|
|
done:
|
|
(void) elf_end(e);
|
|
if (fd >= 0)
|
|
(void) close(fd);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Given an image descriptor, determine whether it is an ELF, or AOUT.
|
|
* If no handler claims the image, set its type to 'INDETERMINABLE'.
|
|
*/
|
|
|
|
void
|
|
pmcstat_image_determine_type(struct pmcstat_image *image,
|
|
struct pmcstat_args *args)
|
|
{
|
|
assert(image->pi_type == PMCSTAT_IMAGE_UNKNOWN);
|
|
|
|
/* Try each kind of handler in turn */
|
|
if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
|
|
pmcstat_image_get_elf_params(image, args);
|
|
if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
|
|
pmcstat_image_get_aout_params(image, args);
|
|
|
|
/*
|
|
* Otherwise, remember that we tried to determine
|
|
* the object's type and had failed.
|
|
*/
|
|
if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
|
|
image->pi_type = PMCSTAT_IMAGE_INDETERMINABLE;
|
|
}
|
|
|
|
/*
|
|
* Locate an image descriptor given an interned path, adding a fresh
|
|
* descriptor to the cache if necessary. This function also finds a
|
|
* suitable name for this image's sample file.
|
|
*
|
|
* We defer filling in the file format specific parts of the image
|
|
* structure till the time we actually see a sample that would fall
|
|
* into this image.
|
|
*/
|
|
|
|
struct pmcstat_image *
|
|
pmcstat_image_from_path(pmcstat_interned_string internedpath,
|
|
int iskernelmodule, struct pmcstat_args *args,
|
|
struct pmc_plugins *plugins)
|
|
{
|
|
int hash;
|
|
struct pmcstat_image *pi;
|
|
|
|
hash = pmcstat_string_lookup_hash(internedpath);
|
|
|
|
/* First, look for an existing entry. */
|
|
LIST_FOREACH(pi, &pmcstat_image_hash[hash], pi_next)
|
|
if (pi->pi_execpath == internedpath &&
|
|
pi->pi_iskernelmodule == iskernelmodule)
|
|
return (pi);
|
|
|
|
/*
|
|
* Allocate a new entry and place it at the head of the hash
|
|
* and LRU lists.
|
|
*/
|
|
pi = malloc(sizeof(*pi));
|
|
if (pi == NULL)
|
|
return (NULL);
|
|
|
|
pi->pi_type = PMCSTAT_IMAGE_UNKNOWN;
|
|
pi->pi_execpath = internedpath;
|
|
pi->pi_start = ~0;
|
|
pi->pi_end = 0;
|
|
pi->pi_entry = 0;
|
|
pi->pi_vaddr = 0;
|
|
pi->pi_isdynamic = 0;
|
|
pi->pi_iskernelmodule = iskernelmodule;
|
|
pi->pi_dynlinkerpath = NULL;
|
|
pi->pi_symbols = NULL;
|
|
pi->pi_symcount = 0;
|
|
pi->pi_addr2line = NULL;
|
|
|
|
if (plugins[args->pa_pplugin].pl_initimage != NULL)
|
|
plugins[args->pa_pplugin].pl_initimage(pi);
|
|
if (plugins[args->pa_plugin].pl_initimage != NULL)
|
|
plugins[args->pa_plugin].pl_initimage(pi);
|
|
|
|
LIST_INSERT_HEAD(&pmcstat_image_hash[hash], pi, pi_next);
|
|
|
|
return (pi);
|
|
}
|