From 6d5d786f800c42ac9eec4ba9ac7202bd184c7242 Mon Sep 17 00:00:00 2001 From: Alexander Kabaev Date: Thu, 29 May 2003 22:58:26 +0000 Subject: [PATCH] Allow threading libraries to register their own locking implementation in case default one provided by rtld is not suitable. Consolidate various identical MD lock implementation into a single file using appropriate machine/atomic.h. Approved by: re (scottl) --- libexec/rtld-elf/Makefile | 5 +- libexec/rtld-elf/alpha/rtld_machdep.h | 18 +- libexec/rtld-elf/alpha/rtld_start.S | 49 ---- libexec/rtld-elf/amd64/rtld_machdep.h | 26 +- libexec/rtld-elf/i386/lockdflt.c | 132 ---------- libexec/rtld-elf/i386/rtld_machdep.h | 26 +- libexec/rtld-elf/ia64/reloc.c | 14 +- libexec/rtld-elf/ia64/rtld_machdep.h | 11 +- libexec/rtld-elf/ia64/rtld_start.S | 96 ++----- libexec/rtld-elf/libmap.c | 9 +- libexec/rtld-elf/powerpc/reloc.c | 52 ++-- libexec/rtld-elf/powerpc/rtld_machdep.h | 6 +- libexec/rtld-elf/powerpc/rtld_start.S | 12 +- libexec/rtld-elf/rtld.c | 124 +++------ libexec/rtld-elf/rtld.h | 3 +- libexec/rtld-elf/rtld_lock.c | 336 ++++++++++++++++++++++++ libexec/rtld-elf/rtld_lock.h | 63 +++++ libexec/rtld-elf/sparc64/reloc.c | 34 +-- libexec/rtld-elf/sparc64/rtld_machdep.h | 10 - libexec/rtld-elf/sparc64/rtld_start.S | 12 +- 20 files changed, 558 insertions(+), 480 deletions(-) create mode 100644 libexec/rtld-elf/rtld_lock.c create mode 100644 libexec/rtld-elf/rtld_lock.h diff --git a/libexec/rtld-elf/Makefile b/libexec/rtld-elf/Makefile index ff03080411cb..0813b6e44e6c 100644 --- a/libexec/rtld-elf/Makefile +++ b/libexec/rtld-elf/Makefile @@ -1,10 +1,11 @@ # $FreeBSD$ PROG= ld-elf.so.1 -SRCS= rtld_start.S rtld.c lockdflt.c map_object.c malloc.c \ +SRCS= rtld_start.S rtld.c rtld_lock.c map_object.c malloc.c \ xmalloc.c debug.c reloc.c MAN= rtld.1 -CFLAGS+= -Wall -DFREEBSD_ELF -I${.CURDIR}/${MACHINE_ARCH} -I${.CURDIR} +CFLAGS+= -Wall -DFREEBSD_ELF -DIN_RTLD +CFLAGS+= -I${.CURDIR}/${MACHINE_ARCH} -I${.CURDIR} LDFLAGS+= -nostdlib -e .rtld_start INSTALLFLAGS= -fschg -C -b MLINKS= rtld.1 ld-elf.so.1.1 \ diff --git a/libexec/rtld-elf/alpha/rtld_machdep.h b/libexec/rtld-elf/alpha/rtld_machdep.h index d224b9c84068..008abcef0fe1 100644 --- a/libexec/rtld-elf/alpha/rtld_machdep.h +++ b/libexec/rtld-elf/alpha/rtld_machdep.h @@ -29,6 +29,18 @@ #ifndef RTLD_MACHDEP_H #define RTLD_MACHDEP_H 1 +#include +#include + +/* + * This value of CACHE_LINE_SIZE is conservative. The actual size + * is 32 on the 21064, 21064A, 21066, 21066A, and 21164. It is 64 + * on the 21264. Compaq recommends sequestering each lock in its own + * 128-byte block to allow for future implementations with larger + * cache lines. + */ +#define CACHE_LINE_SIZE 128 + struct Struct_Obj_Entry; /* Return the address of the .dynamic section in the dynamic linker. */ @@ -48,10 +60,4 @@ Elf_Addr reloc_jmpslot(Elf_Addr *, Elf_Addr, /* Lazy binding entry point, called via PLT. */ void _rtld_bind_start_old(void); -/* Atomic operations. */ -int cmp0_and_store_int(volatile int *, int); -void atomic_add_int(volatile int *, int); -void atomic_incr_int(volatile int *); -void atomic_decr_int(volatile int *); - #endif diff --git a/libexec/rtld-elf/alpha/rtld_start.S b/libexec/rtld-elf/alpha/rtld_start.S index d21c0884873f..8ab7605a5817 100644 --- a/libexec/rtld-elf/alpha/rtld_start.S +++ b/libexec/rtld-elf/alpha/rtld_start.S @@ -209,52 +209,3 @@ NESTED_NOPROFILE(_rtld_bind_start_old, 0, 168, ra, 0, 0) END(_rtld_bind_start_old) -/* - * int cmp0_and_store_int(volatile int *p, int newval); - * - * If an int holds 0, store newval into it; else do nothing. Returns - * the previous value. - */ -LEAF(cmp0_and_store_int, 2) -1: mov a1, t0 - ldl_l v0, 0(a0) - bne v0, 3f - stl_c t0, 0(a0) - beq t0, 2f - mb - RET -2: br 1b -3: RET -END(cmp0_and_store_int) - -LEAF(atomic_add_int, 2) -0: ldl_l t0, 0(a0) - addq t0, a1, t0 - stl_c t0, 0(a0) - beq t0, 1f - mb - RET -1: br 0b -END(atomic_add_int) - -/* Atomically increment an int. */ -LEAF(atomic_incr_int, 1) -0: ldl_l t0, 0(a0) - addq t0, 1, t0 - stl_c t0, 0(a0) - beq t0, 1f - mb - RET -1: br 0b -END(atomic_incr_int) - -/* Atomically decrement an int. */ -LEAF(atomic_decr_int, 1) -0: ldl_l t0, 0(a0) - subq t0, 1, t0 - stl_c t0, 0(a0) - beq t0, 1f - mb - RET -1: br 0b -END(atomic_decr_int) diff --git a/libexec/rtld-elf/amd64/rtld_machdep.h b/libexec/rtld-elf/amd64/rtld_machdep.h index 4bd7ae2a53c6..953e289abd9b 100644 --- a/libexec/rtld-elf/amd64/rtld_machdep.h +++ b/libexec/rtld-elf/amd64/rtld_machdep.h @@ -29,6 +29,11 @@ #ifndef RTLD_MACHDEP_H #define RTLD_MACHDEP_H 1 +#include +#include + +#define CACHE_LINE_SIZE 32 + struct Struct_Obj_Entry; /* Return the address of the .dynamic section in the dynamic linker. */ @@ -53,25 +58,4 @@ reloc_jmpslot(Elf_Addr *where, Elf_Addr target, #define call_initfini_pointer(obj, target) \ (((InitFunc)(target))()) -static inline void -atomic_decr_int(volatile int *p) -{ - __asm __volatile ("lock; decl %0" : "+m"(*p) : : "cc"); -} - -static inline void -atomic_incr_int(volatile int *p) -{ - __asm __volatile ("lock; incl %0" : "+m"(*p) : : "cc"); -} - -static inline void -atomic_add_int(volatile int *p, int val) -{ - __asm __volatile ("lock; addl %1, %0" - : "+m"(*p) - : "ri"(val) - : "cc"); -} - #endif diff --git a/libexec/rtld-elf/i386/lockdflt.c b/libexec/rtld-elf/i386/lockdflt.c index 46f8922602b7..42dc7fea1b96 100644 --- a/libexec/rtld-elf/i386/lockdflt.c +++ b/libexec/rtld-elf/i386/lockdflt.c @@ -50,23 +50,6 @@ #include #include -#include -#include - -#include "debug.h" -#include "rtld.h" - -#define CACHE_LINE_SIZE 32 - -#define WAFLAG 0x1 /* A writer holds the lock */ -#define RC_INCR 0x2 /* Adjusts count of readers desiring lock */ - -typedef struct Struct_Lock { - volatile int lock; - void *base; -} Lock; - -static sigset_t fullsigmask, oldsigmask; static inline int cmpxchgl(int old, int new, volatile int *m) @@ -93,44 +76,6 @@ xchgl(int v, volatile int *m) return result; } -static void * -lock_create(void *context) -{ - void *base; - char *p; - uintptr_t r; - Lock *l; - - /* - * Arrange for the lock to occupy its own cache line. First, we - * optimistically allocate just a cache line, hoping that malloc - * will give us a well-aligned block of memory. If that doesn't - * work, we allocate a larger block and take a well-aligned cache - * line from it. - */ - base = xmalloc(CACHE_LINE_SIZE); - p = (char *)base; - if ((uintptr_t)p % CACHE_LINE_SIZE != 0) { - free(base); - base = xmalloc(2 * CACHE_LINE_SIZE); - p = (char *)base; - if ((r = (uintptr_t)p % CACHE_LINE_SIZE) != 0) - p += CACHE_LINE_SIZE - r; - } - l = (Lock *)p; - l->base = base; - l->lock = 0; - return l; -} - -static void -lock_destroy(void *lock) -{ - Lock *l = (Lock *)lock; - - free(l->base); -} - /* * Crude exclusive locks for the 80386, which does not support the * cmpxchg instruction. @@ -161,51 +106,6 @@ lock80386_release(void *lock) sigprocmask(SIG_SETMASK, &oldsigmask, NULL); } -/* - * Better reader/writer locks for the 80486 and later CPUs. - */ -static void -rlock_acquire(void *lock) -{ - Lock *l = (Lock *)lock; - - atomic_add_int(&l->lock, RC_INCR); - while (l->lock & WAFLAG) - ; /* Spin */ -} - -static void -wlock_acquire(void *lock) -{ - Lock *l = (Lock *)lock; - sigset_t tmp_oldsigmask; - - for ( ; ; ) { - sigprocmask(SIG_BLOCK, &fullsigmask, &tmp_oldsigmask); - if (cmpxchgl(0, WAFLAG, &l->lock) == 0) - break; - sigprocmask(SIG_SETMASK, &tmp_oldsigmask, NULL); - } - oldsigmask = tmp_oldsigmask; -} - -static void -rlock_release(void *lock) -{ - Lock *l = (Lock *)lock; - - atomic_add_int(&l->lock, -RC_INCR); -} - -static void -wlock_release(void *lock) -{ - Lock *l = (Lock *)lock; - - atomic_add_int(&l->lock, -WAFLAG); - sigprocmask(SIG_SETMASK, &oldsigmask, NULL); -} - /* * Code to determine at runtime whether the CPU supports the cmpxchg * instruction. This instruction allows us to use locks that are more @@ -242,35 +142,3 @@ cpu_supports_cmpxchg(void) return result; } -void -lockdflt_init(LockInfo *li) -{ - li->context = NULL; - li->context_destroy = NULL; - li->lock_create = lock_create; - li->lock_destroy = lock_destroy; - if (cpu_supports_cmpxchg()) { - /* Use fast locks that require an 80486 or later. */ - li->rlock_acquire = rlock_acquire; - li->wlock_acquire = wlock_acquire; - li->rlock_release = rlock_release; - li->wlock_release = wlock_release; - } else { - /* It's a cruddy old 80386. */ - li->rlock_acquire = li->wlock_acquire = lock80386_acquire; - li->rlock_release = li->wlock_release = lock80386_release; - } - /* - * Construct a mask to block all signals except traps which might - * conceivably be generated within the dynamic linker itself. - */ - sigfillset(&fullsigmask); - sigdelset(&fullsigmask, SIGILL); - sigdelset(&fullsigmask, SIGTRAP); - sigdelset(&fullsigmask, SIGABRT); - sigdelset(&fullsigmask, SIGEMT); - sigdelset(&fullsigmask, SIGFPE); - sigdelset(&fullsigmask, SIGBUS); - sigdelset(&fullsigmask, SIGSEGV); - sigdelset(&fullsigmask, SIGSYS); -} diff --git a/libexec/rtld-elf/i386/rtld_machdep.h b/libexec/rtld-elf/i386/rtld_machdep.h index 4bd7ae2a53c6..953e289abd9b 100644 --- a/libexec/rtld-elf/i386/rtld_machdep.h +++ b/libexec/rtld-elf/i386/rtld_machdep.h @@ -29,6 +29,11 @@ #ifndef RTLD_MACHDEP_H #define RTLD_MACHDEP_H 1 +#include +#include + +#define CACHE_LINE_SIZE 32 + struct Struct_Obj_Entry; /* Return the address of the .dynamic section in the dynamic linker. */ @@ -53,25 +58,4 @@ reloc_jmpslot(Elf_Addr *where, Elf_Addr target, #define call_initfini_pointer(obj, target) \ (((InitFunc)(target))()) -static inline void -atomic_decr_int(volatile int *p) -{ - __asm __volatile ("lock; decl %0" : "+m"(*p) : : "cc"); -} - -static inline void -atomic_incr_int(volatile int *p) -{ - __asm __volatile ("lock; incl %0" : "+m"(*p) : : "cc"); -} - -static inline void -atomic_add_int(volatile int *p, int val) -{ - __asm __volatile ("lock; addl %1, %0" - : "+m"(*p) - : "ri"(val) - : "cc"); -} - #endif diff --git a/libexec/rtld-elf/ia64/reloc.c b/libexec/rtld-elf/ia64/reloc.c index 7db282304c51..0c62a87aef80 100644 --- a/libexec/rtld-elf/ia64/reloc.c +++ b/libexec/rtld-elf/ia64/reloc.c @@ -122,8 +122,8 @@ alloc_fptrs(Obj_Entry *obj, bool mapped) } /* - * This assertion is necessary to guarantee function pointer - * uniqueness + * This assertion is necessary to guarantee function pointer + * uniqueness */ assert(fptrs != NULL); @@ -136,12 +136,12 @@ free_fptrs(Obj_Entry *obj, bool mapped) struct fptr **fptrs; size_t fbytes; - fptrs = obj->priv; + fptrs = obj->priv; if (fptrs == NULL) return; fbytes = obj->nchains * sizeof(struct fptr *); - if (mapped) + if (mapped) munmap(fptrs, fbytes); else free(fptrs); @@ -186,7 +186,7 @@ reloc_non_plt_obj(Obj_Entry *obj_rtld, Obj_Entry *obj, const Elf_Rela *rela, /* * We have to make sure that all @fptr references to * the same function are identical so that code can - * compare function pointers. + * compare function pointers. */ const Elf_Sym *def; const Obj_Entry *defobj; @@ -313,8 +313,8 @@ reloc_non_plt(Obj_Entry *obj, Obj_Entry *obj_rtld) if (cache) munmap(cache, bytes); - /* - * Release temporarily mapped fptrs if relocating + /* + * Release temporarily mapped fptrs if relocating * rtld object itself. A new table will be created * in make_function_pointer using malloc when needed. */ diff --git a/libexec/rtld-elf/ia64/rtld_machdep.h b/libexec/rtld-elf/ia64/rtld_machdep.h index a0ea72ff882a..bf1261e87ee2 100644 --- a/libexec/rtld-elf/ia64/rtld_machdep.h +++ b/libexec/rtld-elf/ia64/rtld_machdep.h @@ -29,6 +29,11 @@ #ifndef RTLD_MACHDEP_H #define RTLD_MACHDEP_H 1 +#include +#include + +#define CACHE_LINE_SIZE 128 + /* * Macros for cracking ia64 function pointers. */ @@ -50,10 +55,4 @@ Elf_Addr reloc_jmpslot(Elf_Addr *, Elf_Addr, const struct Struct_Obj_Entry *, void *make_function_pointer(const Elf_Sym *, const struct Struct_Obj_Entry *); void call_initfini_pointer(const struct Struct_Obj_Entry *, Elf_Addr); -/* Atomic operations. */ -int cmp0_and_store_int(volatile int *, int); -void atomic_add_int(volatile int *, int); -void atomic_incr_int(volatile int *); -void atomic_decr_int(volatile int *); - #endif diff --git a/libexec/rtld-elf/ia64/rtld_start.S b/libexec/rtld-elf/ia64/rtld_start.S index b4408c671c83..158520713e50 100644 --- a/libexec/rtld-elf/ia64/rtld_start.S +++ b/libexec/rtld-elf/ia64/rtld_start.S @@ -50,7 +50,7 @@ ENTRY(_rtld_start, 0) ;; sub out0=r14,r15 // out0 is image base address br.call.sptk.many rp=_rtld_reloc // fixup image - + add sp=-16,sp // 16 bytes for us, 16 for _rtld ;; mov out0=in0 @@ -60,7 +60,7 @@ ENTRY(_rtld_start, 0) br.call.sptk.many rp=_rtld // r8=_rtld(sp, &exit_proc, &obj_main) add r16=16,sp // address for exit proc - ;; + ;; ld8 r15=[r16] // read exit proc add sp=16,sp // readjust stack mov b7=r8 // address of real _start @@ -106,12 +106,12 @@ ENTRY(_rtld_bind_start, 0) stf.spill [r17]=f10,32 stf.spill [r18]=f11,32 mov out0=r16 // Obj_Entry for caller - ;; + ;; } { .mmi stf.spill [r17]=f12,32 stf.spill [r18]=f13,32 shladd out1=r15,3,out1 // rela offset = 24 * index - ;; + ;; } { .mmb stf.spill [r17]=f14,32 stf.spill [r18]=f15,32 @@ -125,21 +125,21 @@ ENTRY(_rtld_bind_start, 0) ld8 r1=[r8] // target gp mov ar.pfs=loc0 // clean up mov rp=loc1 -} { .mmi +} { .mmi ldf.fill f8=[r17],32 // restore float arguments ldf.fill f9=[r18],32 mov r8=loc2 // restore structure pointer - ;; + ;; } { .mmi ldf.fill f10=[r17],32 ldf.fill f11=[r18],32 mov r9=loc3 - ;; + ;; } { .mmi ldf.fill f12=[r17],32 ldf.fill f13=[r18],32 mov r10=loc4 - ;; + ;; } { .mmi ldf.fill f14=[r17],32 ldf.fill f15=[r18],32 @@ -157,69 +157,15 @@ ENTRY(_rtld_bind_start, 0) } END(_rtld_bind_start) -/* - * int cmp0_and_store_int(volatile int *p, int newval); - * - * If an int holds 0, store newval into it; else do nothing. Returns - * the previous value. - */ -ENTRY(cmp0_and_store_int, 2) - mov ar.ccv=0 - ;; - cmpxchg4.acq r8=[in0],in1,ar.ccv - br.ret.sptk.many rp -END(cmp0_and_store_int) - -ENTRY(atomic_add_int, 2) -1: ld4 r14=[in0] - ;; - mov ar.ccv=r14 - add r15=in1,r14 - ;; - cmpxchg4.acq r16=[in0],r15,ar.ccv - ;; - cmp.ne p6,p0=r14,r16 -(p6) br.cond.spnt.few 1b - br.ret.sptk.many rp -END(atomic_add_int) - -/* Atomically increment an int. */ -ENTRY(atomic_incr_int, 1) -1: ld4 r14=[in0] - ;; - mov ar.ccv=r14 - add r15=1,r14 - ;; - cmpxchg4.acq r16=[in0],r15,ar.ccv - ;; - cmp.ne p6,p0=r14,r16 -(p6) br.cond.spnt.few 1b - br.ret.sptk.many rp -END(atomic_incr_int) - -/* Atomically decrement an int. */ -ENTRY(atomic_decr_int, 1) -1: ld4 r14=[in0] - ;; - mov ar.ccv=r14 - add r15=-1,r14 - ;; - cmpxchg4.acq r16=[in0],r15,ar.ccv - ;; - cmp.ne p6,p0=r14,r16 -(p6) br.cond.spnt.few 1b - br.ret.sptk.many rp -END(atomic_decr_int) - #define DT_NULL 0 /* Terminating entry. */ #define DT_RELA 7 /* Address of ElfNN_Rela relocations. */ #define DT_RELASZ 8 /* Total size of ElfNN_Rela relocations. */ #define DT_RELAENT 9 /* Size of each ElfNN_Rela relocation entry. */ - + #define R_IA64_NONE 0 /* None */ #define R_IA64_DIR64LSB 0x27 /* word64 LSB S + A */ #define R_IA64_REL64LSB 0x6f /* word64 LSB BD + A */ - + /* * _rtld_reloc: relocate the rtld image, apart from @fptrs. * @@ -233,7 +179,7 @@ END(atomic_decr_int) STATIC_ENTRY(_rtld_reloc, 1) alloc loc0=ar.pfs,1,2,0,0 mov loc1=rp - ;; + ;; movl r15=@gprel(_DYNAMIC) // find _DYNAMIC etc. ;; add r15=r15,gp // relocate _DYNAMIC etc. @@ -244,24 +190,24 @@ STATIC_ENTRY(_rtld_reloc, 1) ;; cmp.eq p6,p0=DT_NULL,r16 // done? (p6) br.cond.dpnt.few 2f - ;; + ;; cmp.eq p6,p0=DT_RELA,r16 - ;; + ;; (p6) add r18=r17,in0 // found rela section - ;; + ;; cmp.eq p6,p0=DT_RELASZ,r16 - ;; + ;; (p6) mov r19=r17 // found rela size - ;; + ;; cmp.eq p6,p0=DT_RELAENT,r16 - ;; + ;; (p6) mov r22=r17 // found rela entry size ;; br.sptk.few 1b - -2: + +2: ld8 r15=[r18],8 // read r_offset - ;; + ;; ld8 r16=[r18],8 // read r_info add r15=r15,in0 // relocate r_offset ;; @@ -284,7 +230,7 @@ STATIC_ENTRY(_rtld_reloc, 1) 3: cmp.ltu p6,p0=0,r19 // more? (p6) br.cond.dptk.few 2b // loop - + mov r8=0 // success return value ;; br.cond.sptk.few 9f // done diff --git a/libexec/rtld-elf/libmap.c b/libexec/rtld-elf/libmap.c index 0588e1b53d9a..13fa888147b4 100644 --- a/libexec/rtld-elf/libmap.c +++ b/libexec/rtld-elf/libmap.c @@ -91,7 +91,7 @@ lm_init (void) /* * There should be nothing except whitespace or comment - * from this point to the end of the line. + from this point to the end of the line. */ while(isspace(*cp++)); if (!iseol(*cp)) continue; @@ -114,7 +114,7 @@ lm_init (void) /* Parse 'to' mapping */ t = cp++; while (!isspace(*cp) && !iseol(*cp)) cp++; - + /* Skip and zero out the trailing whitespace */ while (isspace(*cp)) *cp++ = '\0'; @@ -167,10 +167,6 @@ lm_add (char *p, char *f, char *t) if (p == NULL) p = "$DEFAULT$"; -#if 0 - printf("%s(\"%s\", \"%s\", \"%s\")\n", __func__, p, f, t); -#endif - if ((lml = lmp_find(p)) == NULL) lml = lmp_init(xstrdup(p)); @@ -240,3 +236,4 @@ lmp_init (char *n) return (&lmp->lml); } + diff --git a/libexec/rtld-elf/powerpc/reloc.c b/libexec/rtld-elf/powerpc/reloc.c index 038460002cbd..9894033e459a 100644 --- a/libexec/rtld-elf/powerpc/reloc.c +++ b/libexec/rtld-elf/powerpc/reloc.c @@ -55,12 +55,12 @@ do_copy_relocations(Obj_Entry *dstobj) const Elf_Rela *relalim; const Elf_Rela *rela; - /* + /* * COPY relocs are invalid outside of the main program */ - assert(dstobj->mainprog); + assert(dstobj->mainprog); - relalim = (const Elf_Rela *) ((caddr_t) dstobj->rela + + relalim = (const Elf_Rela *) ((caddr_t) dstobj->rela + dstobj->relasize); for (rela = dstobj->rela; rela < relalim; rela++) { void *dstaddr; @@ -81,8 +81,8 @@ do_copy_relocations(Obj_Entry *dstobj) name = dstobj->strtab + dstsym->st_name; hash = elf_hash(name); size = dstsym->st_size; - - for (srcobj = dstobj->next; srcobj != NULL; + + for (srcobj = dstobj->next; srcobj != NULL; srcobj = srcobj->next) { if ((srcsym = symlook_obj(name, hash, srcobj, false)) != NULL) { @@ -96,12 +96,12 @@ do_copy_relocations(Obj_Entry *dstobj) " relocation in %s", name, dstobj->path); return (-1); } - + srcaddr = (const void *) (srcobj->relocbase+srcsym->st_value); - memcpy(dstaddr, srcaddr, size); + memcpy(dstaddr, srcaddr, size); dbg("copy_reloc: src=%p,dst=%p,size=%d\n",srcaddr,dstaddr,size); } - + return (0); } @@ -131,7 +131,7 @@ reloc_non_plt_self(Elf_Dyn *dynp, Elf_Addr relocbase) } /* - * Relocate these values + * Relocate these values */ relalim = (const Elf_Rela *)((caddr_t)rela + relasz); for (; rela < relalim; rela++) { @@ -142,7 +142,7 @@ reloc_non_plt_self(Elf_Dyn *dynp, Elf_Addr relocbase) /* - * Relocate a non-PLT object with addend. + * Relocate a non-PLT object with addend. */ static int reloc_nonplt_object(Obj_Entry *obj_rtld, Obj_Entry *obj, const Elf_Rela *rela, @@ -154,7 +154,7 @@ reloc_nonplt_object(Obj_Entry *obj_rtld, Obj_Entry *obj, const Elf_Rela *rela, Elf_Addr tmp; switch (ELF_R_TYPE(rela->r_info)) { - + case R_PPC_NONE: break; @@ -177,7 +177,7 @@ reloc_nonplt_object(Obj_Entry *obj_rtld, Obj_Entry *obj, const Elf_Rela *rela, case R_PPC_RELATIVE: /* word32 B + A */ tmp = (Elf_Addr)(obj->relocbase + rela->r_addend); - + /* As above, don't issue write unnecessarily */ if (*where != tmp) { *where = tmp; @@ -197,7 +197,7 @@ reloc_nonplt_object(Obj_Entry *obj_rtld, Obj_Entry *obj, const Elf_Rela *rela, " relocation in shared library", obj->path); return (-1); - } + } break; case R_PPC_JMP_SLOT: @@ -209,10 +209,10 @@ reloc_nonplt_object(Obj_Entry *obj_rtld, Obj_Entry *obj, const Elf_Rela *rela, default: _rtld_error("%s: Unsupported relocation type %d" " in non-PLT relocations\n", obj->path, - ELF_R_TYPE(rela->r_info)); + ELF_R_TYPE(rela->r_info)); return (-1); } - return (0); + return (0); } @@ -238,7 +238,7 @@ reloc_non_plt(Obj_Entry *obj, Obj_Entry *obj_rtld) /* * From the SVR4 PPC ABI: - * "The PowerPC family uses only the Elf32_Rela relocation + * "The PowerPC family uses only the Elf32_Rela relocation * entries with explicit addends." */ relalim = (const Elf_Rela *)((caddr_t)obj->rela + obj->relasize); @@ -276,7 +276,7 @@ reloc_plt_object(Obj_Entry *obj, const Elf_Rela *rela) distance = (Elf_Addr)pltresolve - (Elf_Addr)(where + 1); - dbg(" reloc_plt_object: where=%p,pltres=%p,reloff=%x,distance=%x", + dbg(" reloc_plt_object: where=%p,pltres=%p,reloff=%x,distance=%x", (void *)where, (void *)pltresolve, reloff, distance); /* li r11,reloff */ @@ -304,7 +304,7 @@ reloc_plt(Obj_Entry *obj) if (obj->pltrelasize != 0) { - relalim = (const Elf_Rela *)((char *)obj->pltrela + + relalim = (const Elf_Rela *)((char *)obj->pltrela + obj->pltrelasize); for (rela = obj->pltrela; rela < relalim; rela++) { assert(ELF_R_TYPE(rela->r_info) == R_PPC_JMP_SLOT); @@ -352,7 +352,7 @@ reloc_jmpslots(Obj_Entry *obj) (void *)target, basename(defobj->path)); #endif - reloc_jmpslot(where, target, defobj, obj, + reloc_jmpslot(where, target, defobj, obj, (const Elf_Rel *) rela); } @@ -374,7 +374,7 @@ reloc_jmpslot(Elf_Addr *wherep, Elf_Addr target, const Obj_Entry *defobj, Elf_Addr offset; const Elf_Rela *rela = (const Elf_Rela *) rel; - dbg(" reloc_jmpslot: where=%p, target=%p", + dbg(" reloc_jmpslot: where=%p, target=%p", (void *)wherep, (void *)target); /* @@ -444,12 +444,12 @@ init_pltgot(Obj_Entry *obj) * 'The first 18 words (72 bytes) of the PLT are reserved for * use by the dynamic linker. * ... - * 'If the executable or shared object requires N procedure - * linkage table entries, the link editor shall reserve 3*N - * words (12*N bytes) following the 18 reserved words. The - * first 2*N of these words are the procedure linkage table - * entries themselves. The static linker directs calls to bytes - * (72 + (i-1)*8), for i between 1 and N inclusive. The remaining + * 'If the executable or shared object requires N procedure + * linkage table entries, the link editor shall reserve 3*N + * words (12*N bytes) following the 18 reserved words. The + * first 2*N of these words are the procedure linkage table + * entries themselves. The static linker directs calls to bytes + * (72 + (i-1)*8), for i between 1 and N inclusive. The remaining * N words (4*N bytes) are reserved for use by the dynamic linker.' */ diff --git a/libexec/rtld-elf/powerpc/rtld_machdep.h b/libexec/rtld-elf/powerpc/rtld_machdep.h index 7e5e7197c6be..77495d76b41d 100644 --- a/libexec/rtld-elf/powerpc/rtld_machdep.h +++ b/libexec/rtld-elf/powerpc/rtld_machdep.h @@ -29,11 +29,9 @@ #ifndef RTLD_MACHDEP_H #define RTLD_MACHDEP_H 1 +#include #include -#define atomic_incr_int(p) atomic_add_int((p), 1) -#define atomic_decr_int(p) atomic_subtract_int((p), 1) - #define CACHE_LINE_SIZE 32 struct Struct_Obj_Entry; @@ -53,7 +51,7 @@ Elf_Addr reloc_jmpslot(Elf_Addr *where, Elf_Addr target, (((InitFunc)(target))()) /* - * Lazy binding entry point, called via PLT. + * Lazy binding entry point, called via PLT. */ void _rtld_bind_start(void); diff --git a/libexec/rtld-elf/powerpc/rtld_start.S b/libexec/rtld-elf/powerpc/rtld_start.S index 040b66349099..86f76e6d282a 100644 --- a/libexec/rtld-elf/powerpc/rtld_start.S +++ b/libexec/rtld-elf/powerpc/rtld_start.S @@ -33,10 +33,10 @@ .extern _GLOBAL_OFFSET_TABLE_ .extern _DYNAMIC - + _ENTRY(.rtld_start) - stwu %r1,-48(%r1) /* 16-byte aligned stack for reg saves + - exit_proc & obj _rtld args + + stwu %r1,-48(%r1) /* 16-byte aligned stack for reg saves + + exit_proc & obj _rtld args + backchain & lrsave stack frame */ stw %r3,16(%r1) /* argc */ stw %r4,20(%r1) /* argv */ @@ -71,7 +71,7 @@ _ENTRY(.rtld_start) sync icbi %r0,%r4 isync - + lwz %r4,0(%r3) /* offset to _DYNAMIC */ add %r3,%r4,%r3 /* r3 = &_DYNAMIC, absolute value */ @@ -79,7 +79,7 @@ _ENTRY(.rtld_start) mflr %r4 /* &_GLOBAL_OFFSET_TABLE_, absolute value */ lwz %r4,0(%r4) /* linker &_DYNAMIC, from got[0] */ subf %r4,%r4,%r3 /* subtract to calculate relocbase */ - + bl reloc_non_plt_self@plt /* reloc_non_plt_self(&_DYNAMIC,base) */ /* @@ -190,5 +190,5 @@ _ENTRY(_rtld_powerpc_pltcall) lwz %r11,0(%r11) # lwz 11,jmptab@l(11) mtctr %r11 bctr # (*jmptab[index])() - + diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c index adb156aaeaf5..01a1a338814b 100644 --- a/libexec/rtld-elf/rtld.c +++ b/libexec/rtld-elf/rtld.c @@ -100,7 +100,6 @@ static void linkmap_delete(Obj_Entry *); static int load_needed_objects(Obj_Entry *); static int load_preload_objects(void); static Obj_Entry *load_object(char *); -static void lock_check(void); static Obj_Entry *obj_from_addr(const void *); static void objlist_call_fini(Objlist *); static void objlist_call_init(Objlist *); @@ -155,8 +154,6 @@ static Objlist list_main = /* Objects loaded at program startup */ static Objlist list_fini = /* Objects needing fini() calls */ STAILQ_HEAD_INITIALIZER(list_fini); -static LockInfo lockinfo; - static Elf_Sym sym_zero; /* For resolving undefined weak refs. */ #define GDB_STATE(s,m) r_debug.r_state = s; r_debug_state(&r_debug,m); @@ -178,6 +175,7 @@ static func_ptr_type exports[] = { (func_ptr_type) &dladdr, (func_ptr_type) &dllockinit, (func_ptr_type) &dlinfo, + (func_ptr_type) &_rtld_thread_init, NULL }; @@ -199,36 +197,6 @@ char **environ; (dlp)->num_alloc = obj_count, \ (dlp)->num_used = 0) -static __inline void -rlock_acquire(void) -{ - lockinfo.rlock_acquire(lockinfo.thelock); - atomic_incr_int(&lockinfo.rcount); - lock_check(); -} - -static __inline void -wlock_acquire(void) -{ - lockinfo.wlock_acquire(lockinfo.thelock); - atomic_incr_int(&lockinfo.wcount); - lock_check(); -} - -static __inline void -rlock_release(void) -{ - atomic_decr_int(&lockinfo.rcount); - lockinfo.rlock_release(lockinfo.thelock); -} - -static __inline void -wlock_release(void) -{ - atomic_decr_int(&lockinfo.wcount); - lockinfo.wlock_release(lockinfo.thelock); -} - /* * Main entry point for dynamic linking. The first argument is the * stack pointer. The stack is expected to be laid out as described @@ -259,6 +227,7 @@ _rtld(Elf_Addr *sp, func_ptr_type *exit_proc, Obj_Entry **objp) Obj_Entry *obj; Obj_Entry **preload_tail; Objlist initlist; + int lockstate; /* * On entry, the dynamic linker itself has not been relocated yet. @@ -406,8 +375,7 @@ _rtld(Elf_Addr *sp, func_ptr_type *exit_proc, Obj_Entry **objp) set_program_var("environ", env); dbg("initializing thread locks"); - lockdflt_init(&lockinfo); - lockinfo.thelock = lockinfo.lock_create(lockinfo.context); + lockdflt_init(); /* Make a list of init functions to call. */ objlist_init(&initlist); @@ -416,9 +384,9 @@ _rtld(Elf_Addr *sp, func_ptr_type *exit_proc, Obj_Entry **objp) r_debug_state(NULL, &obj_main->linkmap); /* say hello to gdb! */ objlist_call_init(&initlist); - wlock_acquire(); + lockstate = wlock_acquire(rtld_bind_lock); objlist_clear(&initlist); - wlock_release(); + wlock_release(rtld_bind_lock, lockstate); dbg("transferring control to program entry point = %p", obj_main->entry); @@ -436,8 +404,9 @@ _rtld_bind(Obj_Entry *obj, Elf_Word reloff) const Obj_Entry *defobj; Elf_Addr *where; Elf_Addr target; + int lockstate; - rlock_acquire(); + lockstate = rlock_acquire(rtld_bind_lock); if (obj->pltrel) rel = (const Elf_Rel *) ((caddr_t) obj->pltrel + reloff); else @@ -462,7 +431,7 @@ _rtld_bind(Obj_Entry *obj, Elf_Word reloff) * that the trampoline needs. */ target = reloc_jmpslot(where, target, defobj, obj, rel); - rlock_release(); + rlock_release(rtld_bind_lock, lockstate); return target; } @@ -1092,7 +1061,7 @@ is_exported(const Elf_Sym *def) const func_ptr_type *p; value = (Elf_Addr)(obj_rtld.relocbase + def->st_value); - for (p = exports; *p != NULL; p++) + for (p = exports; *p != NULL; p++) if (FPTR_TARGET(*p) == value) return true; return false; @@ -1230,26 +1199,6 @@ load_object(char *path) return obj; } -/* - * Check for locking violations and die if one is found. - */ -static void -lock_check(void) -{ - int rcount, wcount; - - rcount = lockinfo.rcount; - wcount = lockinfo.wcount; - assert(rcount >= 0); - assert(wcount >= 0); - if (wcount > 1 || (wcount != 0 && rcount != 0)) { - _rtld_error("Application locking error: %d readers and %d writers" - " in dynamic linker. See DLLOCKINIT(3) in manual pages.", - rcount, wcount); - die(); - } -} - static Obj_Entry * obj_from_addr(const void *addr) { @@ -1572,11 +1521,12 @@ int dlclose(void *handle) { Obj_Entry *root; + int lockstate; - wlock_acquire(); + lockstate = wlock_acquire(rtld_bind_lock); root = dlcheck(handle); if (root == NULL) { - wlock_release(); + wlock_release(rtld_bind_lock, lockstate); return -1; } @@ -1590,9 +1540,9 @@ dlclose(void *handle) * The object is no longer referenced, so we must unload it. * First, call the fini functions with no locks held. */ - wlock_release(); + wlock_release(rtld_bind_lock, lockstate); objlist_call_fini(&list_fini); - wlock_acquire(); + lockstate = wlock_acquire(rtld_bind_lock); objlist_remove_unref(&list_fini); /* Finish cleaning up the newly-unreferenced objects. */ @@ -1600,7 +1550,7 @@ dlclose(void *handle) unload_object(root); GDB_STATE(RT_CONSISTENT,NULL); } - wlock_release(); + wlock_release(rtld_bind_lock, lockstate); return 0; } @@ -1640,7 +1590,7 @@ dlopen(const char *name, int mode) Obj_Entry **old_obj_tail; Obj_Entry *obj; Objlist initlist; - int result; + int result, lockstate; ld_tracing = (mode & RTLD_TRACE) == 0 ? NULL : "1"; if (ld_tracing != NULL) @@ -1648,7 +1598,7 @@ dlopen(const char *name, int mode) objlist_init(&initlist); - wlock_acquire(); + lockstate = wlock_acquire(rtld_bind_lock); GDB_STATE(RT_ADD,NULL); old_obj_tail = obj_tail; @@ -1699,15 +1649,15 @@ dlopen(const char *name, int mode) GDB_STATE(RT_CONSISTENT,obj ? &obj->linkmap : NULL); /* Call the init functions with no locks held. */ - wlock_release(); + wlock_release(rtld_bind_lock, lockstate); objlist_call_init(&initlist); - wlock_acquire(); + lockstate = wlock_acquire(rtld_bind_lock); objlist_clear(&initlist); - wlock_release(); + wlock_release(rtld_bind_lock, lockstate); return obj; trace: trace_loaded_objects(obj); - wlock_release(); + wlock_release(rtld_bind_lock, lockstate); exit(0); } @@ -1718,12 +1668,13 @@ dlsym(void *handle, const char *name) unsigned long hash; const Elf_Sym *def; const Obj_Entry *defobj; + int lockstate; hash = elf_hash(name); def = NULL; defobj = NULL; - rlock_acquire(); + lockstate = rlock_acquire(rtld_bind_lock); if (handle == NULL || handle == RTLD_NEXT || handle == RTLD_DEFAULT || handle == RTLD_SELF) { void *retaddr; @@ -1731,7 +1682,7 @@ dlsym(void *handle, const char *name) retaddr = __builtin_return_address(0); /* __GNUC__ only */ if ((obj = obj_from_addr(retaddr)) == NULL) { _rtld_error("Cannot determine caller's shared object"); - rlock_release(); + rlock_release(rtld_bind_lock, lockstate); return NULL; } if (handle == NULL) { /* Just the caller's shared object. */ @@ -1753,7 +1704,7 @@ dlsym(void *handle, const char *name) } } else { if ((obj = dlcheck(handle)) == NULL) { - rlock_release(); + rlock_release(rtld_bind_lock, lockstate); return NULL; } @@ -1775,7 +1726,7 @@ dlsym(void *handle, const char *name) } if (def != NULL) { - rlock_release(); + rlock_release(rtld_bind_lock, lockstate); /* * The value required by the caller is derived from the value @@ -1792,7 +1743,7 @@ dlsym(void *handle, const char *name) } _rtld_error("Undefined symbol \"%s\"", name); - rlock_release(); + rlock_release(rtld_bind_lock, lockstate); return NULL; } @@ -1803,12 +1754,13 @@ dladdr(const void *addr, Dl_info *info) const Elf_Sym *def; void *symbol_addr; unsigned long symoffset; - - rlock_acquire(); + int lockstate; + + lockstate = rlock_acquire(rtld_bind_lock); obj = obj_from_addr(addr); if (obj == NULL) { _rtld_error("No shared object contains address"); - rlock_release(); + rlock_release(rtld_bind_lock, lockstate); return 0; } info->dli_fname = obj->path; @@ -1847,7 +1799,7 @@ dladdr(const void *addr, Dl_info *info) if (info->dli_saddr == addr) break; } - rlock_release(); + rlock_release(rtld_bind_lock, lockstate); return 1; } @@ -1855,9 +1807,9 @@ int dlinfo(void *handle, int request, void *p) { const Obj_Entry *obj; - int error; + int error, lockstate; - rlock_acquire(); + lockstate = rlock_acquire(rtld_bind_lock); if (handle == NULL || handle == RTLD_SELF) { void *retaddr; @@ -1869,7 +1821,7 @@ dlinfo(void *handle, int request, void *p) obj = dlcheck(handle); if (obj == NULL) { - rlock_release(); + rlock_release(rtld_bind_lock, lockstate); return (-1); } @@ -1892,7 +1844,7 @@ dlinfo(void *handle, int request, void *p) error = -1; } - rlock_release(); + rlock_release(rtld_bind_lock, lockstate); return (error); } @@ -2393,7 +2345,7 @@ unload_object(Obj_Entry *root) /* * Pass over the DAG removing unreferenced objects from * appropriate lists. - */ + */ unlink_object(root); /* Unmap all objects that are no longer referenced. */ @@ -2447,3 +2399,5 @@ unref_dag(Obj_Entry *root) STAILQ_FOREACH(elm, &root->dagmembers , link) elm->obj->refcount--; } + + diff --git a/libexec/rtld-elf/rtld.h b/libexec/rtld-elf/rtld.h index 4da1a35cfdad..21a03c84cf5e 100644 --- a/libexec/rtld-elf/rtld.h +++ b/libexec/rtld-elf/rtld.h @@ -36,6 +36,7 @@ #include #include +#include "rtld_lock.h" #include "rtld_machdep.h" #ifndef STANDARD_LIBRARY_PATH @@ -191,7 +192,7 @@ unsigned long elf_hash(const char *); const Elf_Sym *find_symdef(unsigned long, const Obj_Entry *, const Obj_Entry **, bool, SymCache *); void init_pltgot(Obj_Entry *); -void lockdflt_init(LockInfo *); +void lockdflt_init(); void obj_free(Obj_Entry *); Obj_Entry *obj_new(void); int reloc_non_plt(Obj_Entry *, Obj_Entry *); diff --git a/libexec/rtld-elf/rtld_lock.c b/libexec/rtld-elf/rtld_lock.c new file mode 100644 index 000000000000..bdee69041b2e --- /dev/null +++ b/libexec/rtld-elf/rtld_lock.c @@ -0,0 +1,336 @@ +/*- + * Copyright 1999, 2000 John D. Polstra. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * from: FreeBSD: src/libexec/rtld-elf/sparc64/lockdflt.c,v 1.3 2002/10/09 + * $FreeBSD$ + */ + +/* + * Thread locking implementation for the dynamic linker. + * + * We use the "simple, non-scalable reader-preference lock" from: + * + * J. M. Mellor-Crummey and M. L. Scott. "Scalable Reader-Writer + * Synchronization for Shared-Memory Multiprocessors." 3rd ACM Symp. on + * Principles and Practice of Parallel Programming, April 1991. + * + * In this algorithm the lock is a single word. Its low-order bit is + * set when a writer holds the lock. The remaining high-order bits + * contain a count of readers desiring the lock. The algorithm requires + * atomic "compare_and_store" and "add" operations, which we implement + * using assembly language sequences in "rtld_start.S". + */ + +#include +#include +#include + +#include "debug.h" +#include "rtld.h" +#include "rtld_machdep.h" + +#define WAFLAG 0x1 /* A writer holds the lock */ +#define RC_INCR 0x2 /* Adjusts count of readers desiring lock */ + +typedef struct Struct_Lock { + volatile int lock; + void *base; +} Lock; + +static sigset_t fullsigmask, oldsigmask; +static int thread_flag; + +static void * +def_lock_create() +{ + void *base; + char *p; + uintptr_t r; + Lock *l; + + /* + * Arrange for the lock to occupy its own cache line. First, we + * optimistically allocate just a cache line, hoping that malloc + * will give us a well-aligned block of memory. If that doesn't + * work, we allocate a larger block and take a well-aligned cache + * line from it. + */ + base = xmalloc(CACHE_LINE_SIZE); + p = (char *)base; + if ((uintptr_t)p % CACHE_LINE_SIZE != 0) { + free(base); + base = xmalloc(2 * CACHE_LINE_SIZE); + p = (char *)base; + if ((r = (uintptr_t)p % CACHE_LINE_SIZE) != 0) + p += CACHE_LINE_SIZE - r; + } + l = (Lock *)p; + l->base = base; + l->lock = 0; + return l; +} + +static void +def_lock_destroy(void *lock) +{ + Lock *l = (Lock *)lock; + + free(l->base); +} + +static void +def_rlock_acquire(void *lock) +{ + Lock *l = (Lock *)lock; + + atomic_add_acq_int(&l->lock, RC_INCR); + while (l->lock & WAFLAG) + ; /* Spin */ +} + +static void +def_wlock_acquire(void *lock) +{ + Lock *l = (Lock *)lock; + sigset_t tmp_oldsigmask; + + for ( ; ; ) { + sigprocmask(SIG_BLOCK, &fullsigmask, &tmp_oldsigmask); + if (atomic_cmpset_acq_int(&l->lock, 0, WAFLAG)) + break; + sigprocmask(SIG_SETMASK, &tmp_oldsigmask, NULL); + } + oldsigmask = tmp_oldsigmask; +} + +static void +def_lock_release(void *lock) +{ + Lock *l = (Lock *)lock; + + if ((l->lock & WAFLAG) == 0) + atomic_add_rel_int(&l->lock, -RC_INCR); + else { + atomic_add_rel_int(&l->lock, -WAFLAG); + sigprocmask(SIG_SETMASK, &oldsigmask, NULL); + } +} + +#if __i386__ +/* + * Import a crude exclusive lock implementation for i386 processors. + * This file will be removed once i386 support is deprecated in favor + * of i486+. + */ +#include "i386/lockdflt.c" + +#endif + +static int +def_thread_set_flag(int mask) +{ + int old_val = thread_flag; + thread_flag |= mask; + return (old_val); +} + +static int +def_thread_clr_flag(int mask) +{ + int old_val = thread_flag; + thread_flag &= ~mask; + return (old_val); +} + +/* + * Public interface exposed to the rest of the dynamic linker. + */ +static struct RtldLockInfo lockinfo; +static struct RtldLockInfo deflockinfo; + +static __inline__ int +thread_mask_set(int mask) +{ + return lockinfo.thread_set_flag(mask); +} + +static __inline__ void +thread_mask_clear(int mask) +{ + lockinfo.thread_clr_flag(mask); +} + +#define RTLD_LOCK_CNT 2 +struct rtld_lock { + void *handle; + int mask; +} rtld_locks[RTLD_LOCK_CNT]; + +rtld_lock_t rtld_bind_lock = &rtld_locks[0]; +rtld_lock_t rtld_libc_lock = &rtld_locks[1]; + +int +rlock_acquire(rtld_lock_t lock) +{ + if (thread_mask_set(lock->mask)) { + dbg("rlock_acquire: recursed"); + return (0); + } + lockinfo.rlock_acquire(lock->handle); + return (1); +} + +int +wlock_acquire(rtld_lock_t lock) +{ + if (thread_mask_set(lock->mask)) { + dbg("wlock_acquire: recursed"); + return (0); + } + lockinfo.wlock_acquire(lock->handle); + return (1); +} + +void +rlock_release(rtld_lock_t lock, int locked) +{ + if (locked == 0) + return; + thread_mask_clear(lock->mask); + lockinfo.lock_release(lock->handle); +} + +void +wlock_release(rtld_lock_t lock, int locked) +{ + if (locked == 0) + return; + thread_mask_clear(lock->mask); + lockinfo.lock_release(lock->handle); +} + +void +lockdflt_init() +{ + int i; + + deflockinfo.rtli_version = RTLI_VERSION; + deflockinfo.lock_create = def_lock_create; + deflockinfo.lock_destroy = def_lock_destroy; + deflockinfo.rlock_acquire = def_rlock_acquire; + deflockinfo.wlock_acquire = def_wlock_acquire; + deflockinfo.lock_release = def_lock_release; + deflockinfo.thread_set_flag = def_thread_set_flag; + deflockinfo.thread_clr_flag = def_thread_clr_flag; + deflockinfo.at_fork = NULL; + + for (i = 0; i < RTLD_LOCK_CNT; i++) { + rtld_locks[i].mask = (1 << i); + rtld_locks[i].handle = NULL; + } + +#if __i386__ + if (!cpu_supports_cmpxchg()) { + /* It's a cruddy old 80386. */ + deflockinfo.rlock_acquire = lock80386_acquire; + deflockinfo.wlock_acquire = lock80386_acquire; + deflockinfo.lock_release = lock80386_release; + } +#endif + + memcpy(&lockinfo, &deflockinfo, sizeof(lockinfo)); + _rtld_thread_init(NULL); + /* + * Construct a mask to block all signals except traps which might + * conceivably be generated within the dynamic linker itself. + */ + sigfillset(&fullsigmask); + sigdelset(&fullsigmask, SIGILL); + sigdelset(&fullsigmask, SIGTRAP); + sigdelset(&fullsigmask, SIGABRT); + sigdelset(&fullsigmask, SIGEMT); + sigdelset(&fullsigmask, SIGFPE); + sigdelset(&fullsigmask, SIGBUS); + sigdelset(&fullsigmask, SIGSEGV); + sigdelset(&fullsigmask, SIGSYS); +} + +/* + * Callback function to allow threads implementation to + * register their own locking primitives if the default + * one is not suitable. + * The current context should be the only context + * executing at the invocation time. + */ +void +_rtld_thread_init(struct RtldLockInfo *pli) +{ + int flags, i; + void *locks[RTLD_LOCK_CNT]; + + /* disable all locking while this function is running */ + flags = thread_mask_set(~0); + + if (pli == NULL) + pli = &deflockinfo; + + + for (i = 0; i < RTLD_LOCK_CNT; i++) + if ((locks[i] = pli->lock_create()) == NULL) + break; + + if (i < RTLD_LOCK_CNT) { + while (--i >= 0) + pli->lock_destroy(locks[i]); + abort(); + } + + for (i = 0; i < RTLD_LOCK_CNT; i++) { + if (rtld_locks[i].handle == NULL) + continue; + if (flags & rtld_locks[i].mask) + lockinfo.lock_release(rtld_locks[i].handle); + lockinfo.lock_destroy(rtld_locks[i].handle); + } + + for (i = 0; i < RTLD_LOCK_CNT; i++) { + rtld_locks[i].handle = locks[i]; + if (flags & rtld_locks[i].mask) + pli->wlock_acquire(rtld_locks[i].handle); + } + + lockinfo.lock_create = pli->lock_create; + lockinfo.lock_destroy = pli->lock_destroy; + lockinfo.rlock_acquire = pli->rlock_acquire; + lockinfo.wlock_acquire = pli->wlock_acquire; + lockinfo.lock_release = pli->lock_release; + lockinfo.thread_set_flag = pli->thread_set_flag; + lockinfo.thread_clr_flag = pli->thread_clr_flag; + lockinfo.at_fork = pli->at_fork; + + /* restore thread locking state, this time with new locks */ + thread_mask_clear(~0); + thread_mask_set(flags); + dbg("_rtld_thread_init: done"); +} diff --git a/libexec/rtld-elf/rtld_lock.h b/libexec/rtld-elf/rtld_lock.h new file mode 100644 index 000000000000..941b19e83bf7 --- /dev/null +++ b/libexec/rtld-elf/rtld_lock.h @@ -0,0 +1,63 @@ +/*- + * Copyright 2003 Alexander Kabaev. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _RTLD_LOCK_H_ +#define _RTLD_LOCK_H_ + +#define RTLI_VERSION 0x01 + +struct RtldLockInfo +{ + unsigned int rtli_version; + void *(*lock_create)(); + void (*lock_destroy)(void *); + void (*rlock_acquire)(void *); + void (*wlock_acquire)(void *); + void (*lock_release)(void *); + int (*thread_set_flag)(int); + int (*thread_clr_flag)(int); + void (*at_fork)(); +}; + +extern void _rtld_thread_init(struct RtldLockInfo *); + +#ifdef IN_RTLD + +struct rtld_lock; +typedef struct rtld_lock *rtld_lock_t; + +extern rtld_lock_t rtld_bind_lock; +extern rtld_lock_t rtld_libc_lock; + +int rlock_acquire(rtld_lock_t); +int wlock_acquire(rtld_lock_t); +void rlock_release(rtld_lock_t, int); +void wlock_release(rtld_lock_t, int); + +#endif /* IN_RTLD */ + +#endif diff --git a/libexec/rtld-elf/sparc64/reloc.c b/libexec/rtld-elf/sparc64/reloc.c index 5808fd3a1eeb..c083c9c6a1ee 100644 --- a/libexec/rtld-elf/sparc64/reloc.c +++ b/libexec/rtld-elf/sparc64/reloc.c @@ -142,9 +142,9 @@ static const char *reloc_names[] = { "GLOB_DAT", "JMP_SLOT", "RELATIVE", "UA_32", "PLT32", "HIPLT22", "LOPLT10", "LOPLT10", "PCPLT22", "PCPLT32", "10", "11", "64", "OLO10", "HH22", - "HM10", "LM22", "PC_HH22", "PC_HM10", "PC_LM22", + "HM10", "LM22", "PC_HH22", "PC_HM10", "PC_LM22", "WDISP16", "WDISP19", "GLOB_JMP", "7", "5", "6", - "DISP64", "PLT64", "HIX22", "LOX10", "H44", "M44", + "DISP64", "PLT64", "HIX22", "LOX10", "H44", "M44", "L44", "REGISTER", "UA64", "UA16" }; #endif @@ -166,7 +166,7 @@ static long reloc_target_bitmask[] = { _BM(22), _BM(22), /* HI22, _22 */ _BM(13), _BM(10), /* RELOC_13, _LO10 */ _BM(10), _BM(13), _BM(22), /* GOT10, GOT13, GOT22 */ - _BM(10), _BM(22), /* _PC10, _PC22 */ + _BM(10), _BM(22), /* _PC10, _PC22 */ _BM(30), 0, /* _WPLT30, _COPY */ _BM(32), _BM(32), _BM(32), /* _GLOB_DAT, JMP_SLOT, _RELATIVE */ _BM(32), _BM(32), /* _UA32, PLT32 */ @@ -284,7 +284,7 @@ reloc_nonplt_object(Obj_Entry *obj, const Elf_Rela *rela, SymCache *cache) Elf_Word type; Elf_Addr value; Elf_Addr mask; - + where = (Elf_Addr *)(obj->relocbase + rela->r_offset); where32 = (Elf_Half *)where; defobj = NULL; @@ -364,11 +364,11 @@ reloc_nonplt_object(Obj_Entry *obj, const Elf_Rela *rela, SymCache *cache) char *ptr; int size; int i; - + size = RELOC_TARGET_SIZE(type) / 8; ptr = (char *)where; tmp = 0; - + /* Read it in one byte at a time. */ for (i = 0; i < size; i++) tmp = (tmp << 8) | ptr[i]; @@ -499,7 +499,7 @@ reloc_jmpslot(Elf_Addr *wherep, Elf_Addr target, const Obj_Entry *obj, where = (Elf_Half *)wherep; offset = ((Elf_Addr)where) - target; if (offset <= (1L<<20) && offset >= -(1L<<20)) { - /* + /* * We're within 1MB -- we can use a direct branch insn. * * We can generate this pattern: @@ -517,7 +517,7 @@ reloc_jmpslot(Elf_Addr *wherep, Elf_Addr target, const Obj_Entry *obj, where[1] = BAA | ((offset >> 2) &0x3fffff); flush(where, 4); } else if (target >= 0 && target < (1L<<32)) { - /* + /* * We're withing 32-bits of address zero. * * The resulting code in the jump slot is: @@ -537,7 +537,7 @@ reloc_jmpslot(Elf_Addr *wherep, Elf_Addr target, const Obj_Entry *obj, where[1] = SETHI | HIVAL(target, 10); flush(where, 4); } else if (target <= 0 && target > -(1L<<32)) { - /* + /* * We're withing 32-bits of address -1. * * The resulting code in the jump slot is: @@ -559,7 +559,7 @@ reloc_jmpslot(Elf_Addr *wherep, Elf_Addr target, const Obj_Entry *obj, where[1] = SETHI | HIVAL(~target, 10); flush(where, 4); } else if (offset <= (1L<<32) && offset >= -((1L<<32) - 4)) { - /* + /* * We're withing 32-bits -- we can use a direct call * insn * @@ -582,7 +582,7 @@ reloc_jmpslot(Elf_Addr *wherep, Elf_Addr target, const Obj_Entry *obj, where[1] = MOV71; flush(where, 4); } else if (offset >= 0 && offset < (1L<<44)) { - /* + /* * We're withing 44 bits. We can generate this pattern: * * The resulting code in the jump slot is: @@ -590,8 +590,8 @@ reloc_jmpslot(Elf_Addr *wherep, Elf_Addr target, const Obj_Entry *obj, * sethi %hi(. - .PLT0), %g1 * sethi %h44(addr), %g1 * or %g1, %m44(addr), %g1 - * sllx %g1, 12, %g1 - * jmp %g1+%l44(addr) + * sllx %g1, 12, %g1 + * jmp %g1+%l44(addr) * nop * nop * nop @@ -606,7 +606,7 @@ reloc_jmpslot(Elf_Addr *wherep, Elf_Addr target, const Obj_Entry *obj, where[1] = SETHI | HIVAL(offset, 22); flush(where, 4); } else if (offset < 0 && offset > -(1L<<44)) { - /* + /* * We're withing 44 bits. We can generate this pattern: * * The resulting code in the jump slot is: @@ -614,8 +614,8 @@ reloc_jmpslot(Elf_Addr *wherep, Elf_Addr target, const Obj_Entry *obj, * sethi %hi(. - .PLT0), %g1 * sethi %h44(-addr), %g1 * xor %g1, %m44(-addr), %g1 - * sllx %g1, 12, %g1 - * jmp %g1+%l44(addr) + * sllx %g1, 12, %g1 + * jmp %g1+%l44(addr) * nop * nop * nop @@ -630,7 +630,7 @@ reloc_jmpslot(Elf_Addr *wherep, Elf_Addr target, const Obj_Entry *obj, where[1] = SETHI | HIVAL(~offset, 22); flush(where, 4); } else { - /* + /* * We need to load all 64-bits * * The resulting code in the jump slot is: diff --git a/libexec/rtld-elf/sparc64/rtld_machdep.h b/libexec/rtld-elf/sparc64/rtld_machdep.h index 0d028c8a8e6a..9efadcabaad9 100644 --- a/libexec/rtld-elf/sparc64/rtld_machdep.h +++ b/libexec/rtld-elf/sparc64/rtld_machdep.h @@ -32,16 +32,6 @@ #include #include -#define atomic_incr_int(p) atomic_add_int((p), 1) -#define atomic_decr_int(p) atomic_subtract_int((p), 1) - -/* - * This value of CACHE_LINE_SIZE is conservative. The actual size - * is 32 on the 21064, 21064A, 21066, 21066A, and 21164. It is 64 - * on the 21264. Compaq recommends sequestering each lock in its own - * 128-byte block to allow for future implementations with larger - * cache lines. - */ #define CACHE_LINE_SIZE 128 struct Struct_Obj_Entry; diff --git a/libexec/rtld-elf/sparc64/rtld_start.S b/libexec/rtld-elf/sparc64/rtld_start.S index b97e7983fdbd..f9d05b743f14 100644 --- a/libexec/rtld-elf/sparc64/rtld_start.S +++ b/libexec/rtld-elf/sparc64/rtld_start.S @@ -79,7 +79,7 @@ END(.rtld_start) * * The first two entries of PLT2 contain the xword object pointer. * - * These routines are called with two longword arguments, + * These routines are called with two longword arguments, * x and y. To calculate the address of the entry, * _rtld_bind_start_1(x, y) does: * @@ -120,7 +120,7 @@ ENTRY(_rtld_bind_start_0) add %l4, %l5, %l4 /* (i / 5120) * 160 + (i % 5120) / 24 */ add %l4, %l6, %l4 /* + NPLTLOSLOTS */ sub %l4, 4, %l4 /* XXX: 4 entries are reserved */ - + sllx %l4, 1, %l5 /* Each element is an Elf_Rela which */ add %l5, %l4, %l4 /* is 3 longwords or 24 bytes. */ sllx %l4, 3, %l4 /* So multiply by 24. */ @@ -133,17 +133,17 @@ ENTRY(_rtld_bind_start_0) jmp %o0 /* return value == function address */ restore /* Dump our stack frame */ END(_rtld_bind_start_0) - + ENTRY(_rtld_bind_start_1) srax %o0, 15, %o2 /* %o0 is the index to our PLT slot */ sub %o2, 4, %o2 /* XXX: 4 entries are reserved */ - + sllx %o2, 1, %o3 /* Each element is an Elf_Rela which */ add %o3, %o2, %o2 /* is 3 longwords or 24 bytes. */ sllx %o2, 3, %o2 /* So multiply by 24. */ - + ldx [%o1 + 8], %o0 /* The object pointer is at [%o1 + 8] */ - + call _rtld_bind /* Call _rtld_bind(obj, offset) */ mov %o2, %o1