Refine symtab sorting in libproc.

Add some rules to more closely match what illumos does when an address
resolves to multiple symbols:
- prefer non-local symbols
- prefer symbols with fewer leading underscores and no leading '$'

Add some regression tests to verify these rules.
This commit is contained in:
Mark Johnston 2017-11-21 16:03:21 +00:00
parent 7c72b10912
commit 1bdc41d252
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=326064
3 changed files with 194 additions and 30 deletions

View File

@ -1,5 +1,5 @@
/*-
* Copyright (c) 2016 Mark Johnston <markj@FreeBSD.org>
* Copyright (c) 2016-2017 Mark Johnston <markj@FreeBSD.org>
* Copyright (c) 2010 The FreeBSD Foundation
* Copyright (c) 2008 John Birrell (jb@freebsd.org)
* All rights reserved.
@ -100,27 +100,59 @@ demangle(const char *symbol, char *buf, size_t len)
strlcpy(buf, symbol, len);
}
static int
symvalcomp(void *thunk, const void *a1, const void *a2)
{
struct symsort_thunk {
Elf *e;
struct symtab *symtab;
};
static int
symvalcmp(void *_thunk, const void *a1, const void *a2)
{
GElf_Sym sym1, sym2;
struct symsort_thunk *thunk;
const char *s1, *s2;
u_int i1, i2;
int ret;
int bind1, bind2;
i1 = *(const u_int *)a1;
i2 = *(const u_int *)a2;
symtab = thunk;
thunk = _thunk;
(void)gelf_getsym(symtab->data, i1, &sym1);
(void)gelf_getsym(symtab->data, i2, &sym2);
if (sym1.st_value < sym2.st_value)
ret = -1;
else if (sym1.st_value == sym2.st_value)
ret = 0;
else
ret = 1;
return (ret);
(void)gelf_getsym(thunk->symtab->data, i1, &sym1);
(void)gelf_getsym(thunk->symtab->data, i2, &sym2);
if (sym1.st_value != sym2.st_value)
return (sym1.st_value < sym2.st_value ? -1 : 1);
/* Prefer non-local symbols. */
bind1 = GELF_ST_BIND(sym1.st_info);
bind2 = GELF_ST_BIND(sym2.st_info);
if (bind1 != bind2) {
if (bind1 == STB_LOCAL && bind2 != STB_LOCAL)
return (-1);
if (bind1 != STB_LOCAL && bind2 == STB_LOCAL)
return (1);
}
s1 = elf_strptr(thunk->e, thunk->symtab->stridx, sym1.st_name);
s2 = elf_strptr(thunk->e, thunk->symtab->stridx, sym2.st_name);
if (s1 != NULL && s2 != NULL) {
/* Prefer symbols without a leading '$'. */
if (*s1 == '$')
return (-1);
if (*s2 == '$')
return (1);
/* Prefer symbols with fewer leading underscores. */
for (; *s1 == '_' && *s2 == '_'; s1++, s2++)
;
if (*s1 == '_')
return (-1);
if (*s2 == '_')
return (1);
}
return (0);
}
static int
@ -128,6 +160,7 @@ load_symtab(Elf *e, struct symtab *symtab, u_long sh_type)
{
GElf_Ehdr ehdr;
GElf_Shdr shdr;
struct symsort_thunk thunk;
Elf_Scn *scn;
u_int nsyms;
@ -155,9 +188,13 @@ load_symtab(Elf *e, struct symtab *symtab, u_long sh_type)
return (-1);
for (u_int i = 0; i < nsyms; i++)
symtab->index[i] = i;
qsort_r(symtab->index, nsyms, sizeof(u_int), symtab, symvalcomp);
symtab->nsyms = nsyms;
symtab->stridx = shdr.sh_link;
thunk.e = e;
thunk.symtab = symtab;
qsort_r(symtab->index, nsyms, sizeof(u_int), &thunk, symvalcmp);
return (0);
}
@ -416,12 +453,16 @@ proc_addr2map(struct proc_handle *p, uintptr_t addr)
* symbol and its name.
*/
static int
lookup_symbol_by_addr(Elf *elf, struct symtab *symtab, uintptr_t addr,
const char **namep, GElf_Sym *sym)
lookup_symbol_by_addr(Elf *e, struct symtab *symtab, uintptr_t addr,
const char **namep, GElf_Sym *symp)
{
GElf_Sym sym;
Elf_Data *data;
const char *s;
int min, max, mid;
u_int i, min, max, mid;
if (symtab->nsyms == 0)
return (ENOENT);
data = symtab->data;
min = 0;
@ -429,21 +470,31 @@ lookup_symbol_by_addr(Elf *elf, struct symtab *symtab, uintptr_t addr,
while (min <= max) {
mid = (max + min) / 2;
(void)gelf_getsym(data, symtab->index[mid], sym);
if (addr >= sym->st_value &&
addr < sym->st_value + sym->st_size) {
s = elf_strptr(elf, symtab->stridx, sym->st_name);
if (s != NULL && namep != NULL)
*namep = s;
return (0);
}
(void)gelf_getsym(data, symtab->index[mid], &sym);
if (addr >= sym.st_value && addr < sym.st_value + sym.st_size)
break;
if (addr < sym->st_value)
if (addr < sym.st_value)
max = mid - 1;
else
min = mid + 1;
}
return (ENOENT);
if (min > max)
return (ENOENT);
/*
* Advance until we find the matching symbol with largest index.
*/
for (i = mid; i < symtab->nsyms; i++) {
(void)gelf_getsym(data, symtab->index[i], &sym);
if (addr < sym.st_value || addr >= sym.st_value + sym.st_size)
break;
}
(void)gelf_getsym(data, symtab->index[i - 1], symp);
s = elf_strptr(e, symtab->stridx, symp->st_name);
if (s != NULL && namep != NULL)
*namep = s;
return (0);
}
int

View File

@ -1,5 +1,5 @@
/*-
* Copyright (c) 2014-2016 Mark Johnston <markj@FreeBSD.org>
* Copyright (c) 2014-2017 Mark Johnston <markj@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -370,6 +370,93 @@ ATF_TC_BODY(signal_forward, tc)
proc_free(phdl);
}
ATF_TC(symbol_sort_local);
ATF_TC_HEAD(symbol_sort_local, tc)
{
atf_tc_set_md_var(tc, "descr",
"Ensure that proc_addr2sym() returns the non-local alias when "
"the address resolves to multiple symbols.");
}
ATF_TC_BODY(symbol_sort_local, tc)
{
char symname[32];
GElf_Sym bar_sym;
struct proc_handle *phdl;
int error;
phdl = start_prog(tc, true);
error = proc_name2sym(phdl, target_prog_file, "bar", &bar_sym, NULL);
ATF_REQUIRE_MSG(error == 0, "failed to look up 'bar' in %s",
target_prog_file);
ATF_REQUIRE(GELF_ST_BIND(bar_sym.st_info) == STB_LOCAL);
error = proc_addr2sym(phdl, bar_sym.st_value, symname, sizeof(symname),
&bar_sym);
ATF_REQUIRE_MSG(error == 0, "failed to resolve 'bar' by addr");
ATF_REQUIRE_MSG(strcmp(symname, "baz") == 0,
"unexpected symbol name '%s'", symname);
ATF_REQUIRE(GELF_ST_BIND(bar_sym.st_info) == STB_GLOBAL);
}
ATF_TC(symbol_sort_prefix);
ATF_TC_HEAD(symbol_sort_prefix, tc)
{
atf_tc_set_md_var(tc, "descr",
"Ensure that proc_addr2sym() returns the alias whose name is not "
"prefixed with '$' if one exists.");
}
ATF_TC_BODY(symbol_sort_prefix, tc)
{
char symname[32];
GElf_Sym qux_sym;
struct proc_handle *phdl;
int error;
phdl = start_prog(tc, true);
error = proc_name2sym(phdl, target_prog_file, "$qux", &qux_sym, NULL);
ATF_REQUIRE_MSG(error == 0, "failed to look up '$qux' in %s",
target_prog_file);
error = proc_addr2sym(phdl, qux_sym.st_value, symname, sizeof(symname),
&qux_sym);
ATF_REQUIRE_MSG(error == 0, "failed to resolve 'qux' by addr");
ATF_REQUIRE_MSG(strcmp(symname, "qux") == 0,
"unexpected symbol name '%s'", symname);
}
ATF_TC(symbol_sort_underscore);
ATF_TC_HEAD(symbol_sort_underscore, tc)
{
atf_tc_set_md_var(tc, "descr",
"Ensure that proc_addr2sym() returns the alias with fewest leading "
"underscores in the name when the address resolves to multiple "
"symbols.");
}
ATF_TC_BODY(symbol_sort_underscore, tc)
{
char symname[32];
GElf_Sym foo_sym;
struct proc_handle *phdl;
int error;
phdl = start_prog(tc, true);
error = proc_name2sym(phdl, target_prog_file, "foo", &foo_sym, NULL);
ATF_REQUIRE_MSG(error == 0, "failed to look up 'foo' in %s",
target_prog_file);
error = proc_addr2sym(phdl, foo_sym.st_value, symname, sizeof(symname),
&foo_sym);
ATF_REQUIRE_MSG(error == 0, "failed to resolve 'foo' by addr");
ATF_REQUIRE_MSG(strcmp(symname, "foo") == 0,
"unexpected symbol name '%s'", symname);
}
ATF_TP_ADD_TCS(tp)
{
@ -379,6 +466,9 @@ ATF_TP_ADD_TCS(tp)
ATF_TP_ADD_TC(tp, symbol_lookup);
ATF_TP_ADD_TC(tp, symbol_lookup_fail);
ATF_TP_ADD_TC(tp, signal_forward);
ATF_TP_ADD_TC(tp, symbol_sort_local);
ATF_TP_ADD_TC(tp, symbol_sort_prefix);
ATF_TP_ADD_TC(tp, symbol_sort_underscore);
return (atf_no_error());
}

View File

@ -42,10 +42,33 @@ usr1(int sig __unused)
saw = 1;
}
void foo(void);
void qux(void);
void
foo(void)
{
}
__weak_reference(foo, _foo);
static void
bar(void)
{
}
__strong_reference(bar, baz);
void
qux(void)
{
}
__strong_reference(qux, $qux);
int
main(int argc, char **argv)
{
bar(); /* force the symbol to be emitted */
if (argc == 1)
return (EXIT_SUCCESS);
if (argc == 2 && strcmp(argv[1], "-s") == 0) {