From aef199e56393cbe7df59c27733d41c6add7b560b Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Sun, 9 Feb 2020 12:22:43 +0000 Subject: [PATCH] Use sigfastblock(2) in rtld. This allows for rtld to not issue two sigprocmask(2) syscalls for each symbol binding operation in single-threaded processes. Rtld needs to block signals as part of locking to ensure signal safety of the bind process, because signal handlers might need to lazily resolve symbol references. As result, number of syscalls issued on startup by simple programs not using libthr, is typically reduced 2x. For instance, for hello world, I see: non-sigfastblock # (truss ./hello > /dev/null) |& wc -l 63 sigfastblock # (truss ./hello > /dev/null) |& wc -l 37 Tested by: pho Disscussed with: cem, emaste, jilles Sponsored by: The FreeBSD Foundation Differential revision: https://reviews.freebsd.org/D12773 --- libexec/rtld-elf/rtld-libc/Makefile.inc | 5 +- libexec/rtld-elf/rtld.c | 5 ++ libexec/rtld-elf/rtld.h | 1 + libexec/rtld-elf/rtld_lock.c | 102 +++++++++++++++--------- 4 files changed, 72 insertions(+), 41 deletions(-) diff --git a/libexec/rtld-elf/rtld-libc/Makefile.inc b/libexec/rtld-elf/rtld-libc/Makefile.inc index dc3a1e47da3c..90e9f97251a3 100644 --- a/libexec/rtld-elf/rtld-libc/Makefile.inc +++ b/libexec/rtld-elf/rtld-libc/Makefile.inc @@ -45,8 +45,9 @@ _libc_string_objects= bcmp bcopy bzero memset memchr memcmp memcpy memmove \ strlen strncmp strncpy strrchr strsep strspn strstr strtok # Also use all the syscall .o files from libc_nossp_pic: _libc_other_objects= sigsetjmp lstat stat fstat fstatat fstatfs syscall \ - cerror geteuid getegid munmap mprotect sysarch __sysctl issetugid __getcwd \ - utrace thr_self thr_kill pread mmap lseek _exit _fstat _fstatat _fstatfs \ + cerror geteuid getegid sigfastblock munmap mprotect \ + sysarch __sysctl issetugid __getcwd utrace \ + thr_self thr_kill pread mmap lseek _exit _fstat _fstatat _fstatfs \ getdirentries _getdirentries _close _fcntl _open _openat _read \ _sigprocmask _write readlink _setjmp setjmp setjmperr diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c index 3e87e53a3158..4d3b32381cbd 100644 --- a/libexec/rtld-elf/rtld.c +++ b/libexec/rtld-elf/rtld.c @@ -286,6 +286,7 @@ Elf_Addr tls_dtv_generation = 1; /* Used to detect when dtv size changes */ int tls_max_index = 1; /* Largest module index allocated */ static bool ld_library_path_rpath = false; +bool ld_fast_sigblock = false; /* * Globals for path names, and such @@ -444,6 +445,10 @@ _rtld(Elf_Addr *sp, func_ptr_type *exit_proc, Obj_Entry **objp) main_argc = argc; main_argv = argv; + if (aux_info[AT_BSDFLAGS] != NULL && + (aux_info[AT_BSDFLAGS]->a_un.a_val & ELF_BSDF_SIGFASTBLK) != 0) + ld_fast_sigblock = true; + trust = !issetugid(); md_abi_variant_hook(aux_info); diff --git a/libexec/rtld-elf/rtld.h b/libexec/rtld-elf/rtld.h index c1996f04219f..f21b1d79cf20 100644 --- a/libexec/rtld-elf/rtld.h +++ b/libexec/rtld-elf/rtld.h @@ -365,6 +365,7 @@ void free_aligned(void *ptr); extern Elf_Addr _GLOBAL_OFFSET_TABLE_[]; extern Elf_Sym sym_zero; /* For resolving undefined weak refs. */ extern bool ld_bind_not; +extern bool ld_fast_sigblock; void dump_relocations(Obj_Entry *); void dump_obj_relocations(Obj_Entry *); diff --git a/libexec/rtld-elf/rtld_lock.c b/libexec/rtld-elf/rtld_lock.c index 77f8f5d747d9..c453584b96e2 100644 --- a/libexec/rtld-elf/rtld_lock.c +++ b/libexec/rtld-elf/rtld_lock.c @@ -45,6 +45,7 @@ */ #include +#include #include #include #include @@ -68,6 +69,7 @@ typedef struct Struct_Lock { static sigset_t fullsigmask, oldsigmask; static int thread_flag, wnested; +static uint32_t fsigblock; static void * def_lock_create(void) @@ -117,6 +119,17 @@ def_rlock_acquire(void *lock) ; /* Spin */ } +static void +sig_fastunblock(void) +{ + uint32_t oldval; + + assert((fsigblock & ~SIGFASTBLOCK_FLAGS) >= SIGFASTBLOCK_INC); + oldval = atomic_fetchadd_32(&fsigblock, -SIGFASTBLOCK_INC); + if (oldval == (SIGFASTBLOCK_PEND | SIGFASTBLOCK_INC)) + __sys_sigfastblock(SIGFASTBLOCK_UNBLOCK, NULL); +} + static void def_wlock_acquire(void *lock) { @@ -124,14 +137,23 @@ def_wlock_acquire(void *lock) sigset_t tmp_oldsigmask; l = (Lock *)lock; - for (;;) { - sigprocmask(SIG_BLOCK, &fullsigmask, &tmp_oldsigmask); - if (atomic_cmpset_acq_int(&l->lock, 0, WAFLAG)) - break; - sigprocmask(SIG_SETMASK, &tmp_oldsigmask, NULL); + if (ld_fast_sigblock) { + for (;;) { + atomic_add_32(&fsigblock, SIGFASTBLOCK_INC); + if (atomic_cmpset_acq_int(&l->lock, 0, WAFLAG)) + break; + sig_fastunblock(); + } + } else { + for (;;) { + sigprocmask(SIG_BLOCK, &fullsigmask, &tmp_oldsigmask); + if (atomic_cmpset_acq_int(&l->lock, 0, WAFLAG)) + break; + sigprocmask(SIG_SETMASK, &tmp_oldsigmask, NULL); + } + if (atomic_fetchadd_int(&wnested, 1) == 0) + oldsigmask = tmp_oldsigmask; } - if (atomic_fetchadd_int(&wnested, 1) == 0) - oldsigmask = tmp_oldsigmask; } static void @@ -143,9 +165,10 @@ def_lock_release(void *lock) if ((l->lock & WAFLAG) == 0) atomic_add_rel_int(&l->lock, -RC_INCR); else { - assert(wnested > 0); atomic_add_rel_int(&l->lock, -WAFLAG); - if (atomic_fetchadd_int(&wnested, -1) == 1) + if (ld_fast_sigblock) + sig_fastunblock(); + else if (atomic_fetchadd_int(&wnested, -1) == 1) sigprocmask(SIG_SETMASK, &oldsigmask, NULL); } } @@ -279,38 +302,36 @@ lock_restart_for_upgrade(RtldLockState *lockstate) void lockdflt_init(void) { - int i; + int i; - deflockinfo.rtli_version = RTLI_VERSION; - deflockinfo.lock_create = def_lock_create; - deflockinfo.lock_destroy = def_lock_destroy; - deflockinfo.rlock_acquire = def_rlock_acquire; - deflockinfo.wlock_acquire = def_wlock_acquire; - deflockinfo.lock_release = def_lock_release; - deflockinfo.thread_set_flag = def_thread_set_flag; - deflockinfo.thread_clr_flag = def_thread_clr_flag; - deflockinfo.at_fork = NULL; + deflockinfo.rtli_version = RTLI_VERSION; + deflockinfo.lock_create = def_lock_create; + deflockinfo.lock_destroy = def_lock_destroy; + deflockinfo.rlock_acquire = def_rlock_acquire; + deflockinfo.wlock_acquire = def_wlock_acquire; + deflockinfo.lock_release = def_lock_release; + deflockinfo.thread_set_flag = def_thread_set_flag; + deflockinfo.thread_clr_flag = def_thread_clr_flag; + deflockinfo.at_fork = NULL; - for (i = 0; i < RTLD_LOCK_CNT; i++) { - rtld_locks[i].mask = (1 << i); - rtld_locks[i].handle = NULL; - } + for (i = 0; i < RTLD_LOCK_CNT; i++) { + rtld_locks[i].mask = (1 << i); + rtld_locks[i].handle = NULL; + } - memcpy(&lockinfo, &deflockinfo, sizeof(lockinfo)); - _rtld_thread_init(NULL); - /* - * Construct a mask to block all signals except traps which might - * conceivably be generated within the dynamic linker itself. - */ - sigfillset(&fullsigmask); - sigdelset(&fullsigmask, SIGILL); - sigdelset(&fullsigmask, SIGTRAP); - sigdelset(&fullsigmask, SIGABRT); - sigdelset(&fullsigmask, SIGEMT); - sigdelset(&fullsigmask, SIGFPE); - sigdelset(&fullsigmask, SIGBUS); - sigdelset(&fullsigmask, SIGSEGV); - sigdelset(&fullsigmask, SIGSYS); + memcpy(&lockinfo, &deflockinfo, sizeof(lockinfo)); + _rtld_thread_init(NULL); + if (ld_fast_sigblock) { + __sys_sigfastblock(SIGFASTBLOCK_SETPTR, &fsigblock); + } else { + /* + * Construct a mask to block all signals. Note that + * blocked traps mean that the process is terminated + * if trap occurs while we are in locked section, with + * the default settings for kern.forcesigexit. + */ + sigfillset(&fullsigmask); + } } /* @@ -331,7 +352,10 @@ _rtld_thread_init(struct RtldLockInfo *pli) if (pli == NULL) pli = &deflockinfo; - + else if (ld_fast_sigblock) { + fsigblock = 0; + __sys_sigfastblock(SIGFASTBLOCK_UNSETPTR, NULL); + } for (i = 0; i < RTLD_LOCK_CNT; i++) if ((locks[i] = pli->lock_create()) == NULL)