linux(4); Almost complete the vDSO.

The vDSO (virtual dynamic shared object) is a small shared library that the
kernel maps R/O into the address space of all Linux processes on image
activation. The vDSO is a fully formed ELF image, shared by all processes
with the same ABI, has no process private data.

The primary purpose of the vDSO:
- non-executable stack, signal trampolines not copied to the stack;
- signal trampolines unwind, mandatory for the NPTL;
- to avoid contex-switch overhead frequently used system calls can be
  implemented in the vDSO: for now gettimeofday, clock_gettime.

The first two have been implemented, so add the implementation of system
calls.

System calls implemenation based on a native timekeeping code with some
limitations:
- ifunc can't be used, as vDSO r/o mapped to the process VA and rtld
  can't relocate symbols;
- reading HPET memory is not implemented for now (TODO).

In case on any error vDSO system calls fallback to the kernel system
calls. For unimplemented vDSO system calls added prototypes which call
corresponding kernel system call.

Tested by:		trasz (arm64)
Differential revision:  https://reviews.freebsd.org/D30900
MFC after:              2 weeks
This commit is contained in:
Dmitry Chagin 2021-07-20 10:01:18 +03:00
parent 5fd9cd53d2
commit 9931033bbf
23 changed files with 1966 additions and 339 deletions

View File

@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <sys/stddef.h>
#include <sys/signalvar.h>
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
@ -72,6 +73,7 @@ __FBSDID("$FreeBSD$");
#include <machine/specialreg.h>
#include <machine/trap.h>
#include <x86/linux/linux_x86.h>
#include <amd64/linux/linux.h>
#include <amd64/linux/linux_proto.h>
#include <compat/linux/linux_emul.h>
@ -85,11 +87,24 @@ __FBSDID("$FreeBSD$");
MODULE_VERSION(linux64, 1);
#define LINUX_VDSOPAGE_SIZE PAGE_SIZE * 2
#define LINUX_VDSOPAGE_LA48 (VM_MAXUSER_ADDRESS_LA48 - \
LINUX_VDSOPAGE_SIZE)
#define LINUX_SHAREDPAGE_LA48 (LINUX_VDSOPAGE_LA48 - PAGE_SIZE)
/*
* PAGE_SIZE - the size
* of the native SHAREDPAGE
*/
#define LINUX_USRSTACK_LA48 LINUX_SHAREDPAGE_LA48
#define LINUX_PS_STRINGS_LA48 (LINUX_USRSTACK_LA48 - \
sizeof(struct ps_strings))
static int linux_szsigcode;
static vm_object_t linux_shared_page_obj;
static char *linux_shared_page_mapping;
extern char _binary_linux_locore_o_start;
extern char _binary_linux_locore_o_end;
static vm_object_t linux_vdso_obj;
static char *linux_vdso_mapping;
extern char _binary_linux_vdso_so_o_start;
extern char _binary_linux_vdso_so_o_end;
static vm_offset_t linux_vdso_base;
extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
@ -102,10 +117,12 @@ static int linux_fixup_elf(uintptr_t *stack_base,
static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
static void linux_vdso_install(void *param);
static void linux_vdso_deinstall(void *param);
static void linux_vdso_reloc(char *mapping, Elf_Addr offset);
static void linux_set_syscall_retval(struct thread *td, int error);
static int linux_fetch_syscall_args(struct thread *td);
static void linux_exec_setregs(struct thread *td, struct image_params *imgp,
uintptr_t stack);
static void linux_exec_sysvec_init(void *param);
static int linux_on_exec_vmspace(struct proc *p,
struct image_params *imgp);
static int linux_vsyscall(struct thread *td);
@ -151,6 +168,8 @@ static int _bsd_to_linux_trapcode[] = {
LINUX_VDSO_SYM_INTPTR(linux_rt_sigcode);
LINUX_VDSO_SYM_CHAR(linux_platform);
LINUX_VDSO_SYM_INTPTR(kern_timekeep_base);
LINUX_VDSO_SYM_INTPTR(kern_tsc_selector);
/*
* If FreeBSD & Linux have a difference of opinion about what a trap
@ -264,8 +283,7 @@ linux_copyout_auxargs(struct image_params *imgp, uintptr_t base)
M_WAITOK | M_ZERO);
issetugid = p->p_flag & P_SUGID ? 1 : 0;
AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR,
imgp->proc->p_sysent->sv_shared_page_base);
AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, linux_vdso_base);
AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
@ -739,7 +757,7 @@ struct sysentvec elf_linux_sysvec = {
.sv_transtrap = linux_translate_traps,
.sv_fixup = linux_fixup_elf,
.sv_sendsig = linux_rt_sendsig,
.sv_sigcode = &_binary_linux_locore_o_start,
.sv_sigcode = &_binary_linux_vdso_so_o_start,
.sv_szsigcode = &linux_szsigcode,
.sv_name = "Linux ELF64",
.sv_coredump = elf64_coredump,
@ -750,8 +768,8 @@ struct sysentvec elf_linux_sysvec = {
.sv_minsigstksz = LINUX_MINSIGSTKSZ,
.sv_minuser = VM_MIN_ADDRESS,
.sv_maxuser = VM_MAXUSER_ADDRESS_LA48,
.sv_usrstack = USRSTACK_LA48,
.sv_psstrings = PS_STRINGS_LA48,
.sv_usrstack = LINUX_USRSTACK_LA48,
.sv_psstrings = LINUX_PS_STRINGS_LA48,
.sv_stackprot = VM_PROT_ALL,
.sv_copyout_auxargs = linux_copyout_auxargs,
.sv_copyout_strings = linux_copyout_strings,
@ -759,11 +777,11 @@ struct sysentvec elf_linux_sysvec = {
.sv_fixlimit = NULL,
.sv_maxssiz = NULL,
.sv_flags = SV_ABI_LINUX | SV_LP64 | SV_SHP | SV_SIG_DISCIGN |
SV_SIG_WAITNDQ,
SV_SIG_WAITNDQ | SV_TIMEKEEP,
.sv_set_syscall_retval = linux_set_syscall_retval,
.sv_fetch_syscall_args = linux_fetch_syscall_args,
.sv_syscallnames = NULL,
.sv_shared_page_base = SHAREDPAGE_LA48,
.sv_shared_page_base = LINUX_SHAREDPAGE_LA48,
.sv_shared_page_len = PAGE_SIZE,
.sv_schedtail = linux_schedtail,
.sv_thread_detach = linux_thread_detach,
@ -777,47 +795,130 @@ struct sysentvec elf_linux_sysvec = {
static int
linux_on_exec_vmspace(struct proc *p, struct image_params *imgp)
{
int error;
linux_on_exec(p, imgp);
return (0);
error = linux_map_vdso(p, linux_vdso_obj, linux_vdso_base,
LINUX_VDSOPAGE_SIZE, imgp);
if (error == 0)
linux_on_exec(p, imgp);
return (error);
}
static void
linux_exec_sysvec_init(void *param)
{
l_uintptr_t *ktimekeep_base, *ktsc_selector;
struct sysentvec *sv;
ptrdiff_t tkoff;
sv = param;
amd64_lower_shared_page(sv);
/* Fill timekeep_base */
exec_sysvec_init(sv);
tkoff = kern_timekeep_base - linux_vdso_base;
ktimekeep_base = (l_uintptr_t *)(linux_vdso_mapping + tkoff);
*ktimekeep_base = sv->sv_timekeep_base;
tkoff = kern_tsc_selector - linux_vdso_base;
ktsc_selector = (l_uintptr_t *)(linux_vdso_mapping + tkoff);
*ktsc_selector = linux_vdso_tsc_selector_idx();
if (bootverbose)
printf("Linux x86-64 vDSO tsc_selector: %lu\n", *ktsc_selector);
}
SYSINIT(elf_linux_exec_sysvec_init, SI_SUB_EXEC, SI_ORDER_ANY,
linux_exec_sysvec_init, &elf_linux_sysvec);
static void
linux_vdso_install(void *param)
{
char *vdso_start = &_binary_linux_vdso_so_o_start;
char *vdso_end = &_binary_linux_vdso_so_o_end;
amd64_lower_shared_page(&elf_linux_sysvec);
linux_szsigcode = vdso_end - vdso_start;
MPASS(linux_szsigcode <= LINUX_VDSOPAGE_SIZE);
linux_szsigcode = (&_binary_linux_locore_o_end -
&_binary_linux_locore_o_start);
linux_vdso_base = LINUX_VDSOPAGE_LA48;
if (hw_lower_amd64_sharedpage != 0)
linux_vdso_base -= PAGE_SIZE;
if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
panic("Linux invalid vdso size\n");
__elfN(linux_vdso_fixup)(vdso_start, linux_vdso_base);
__elfN(linux_vdso_fixup)(&elf_linux_sysvec);
linux_vdso_obj = __elfN(linux_shared_page_init)
(&linux_vdso_mapping, LINUX_VDSOPAGE_SIZE);
bcopy(vdso_start, linux_vdso_mapping, linux_szsigcode);
linux_shared_page_obj = __elfN(linux_shared_page_init)
(&linux_shared_page_mapping);
__elfN(linux_vdso_reloc)(&elf_linux_sysvec);
bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping,
linux_szsigcode);
elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
linux_vdso_reloc(linux_vdso_mapping, linux_vdso_base);
}
SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_FIRST,
linux_vdso_install, NULL);
static void
linux_vdso_deinstall(void *param)
{
__elfN(linux_shared_page_fini)(linux_shared_page_obj,
linux_shared_page_mapping);
__elfN(linux_shared_page_fini)(linux_vdso_obj,
linux_vdso_mapping, LINUX_VDSOPAGE_SIZE);
}
SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
linux_vdso_deinstall, NULL);
static void
linux_vdso_reloc(char *mapping, Elf_Addr offset)
{
const Elf_Ehdr *ehdr;
const Elf_Shdr *shdr;
Elf64_Addr *where, val;
Elf_Size rtype, symidx;
const Elf_Rela *rela;
Elf_Addr addr, addend;
int relacnt;
int i, j;
MPASS(offset != 0);
relacnt = 0;
ehdr = (const Elf_Ehdr *)mapping;
shdr = (const Elf_Shdr *)(mapping + ehdr->e_shoff);
for (i = 0; i < ehdr->e_shnum; i++)
{
switch (shdr[i].sh_type) {
case SHT_REL:
printf("Linux x86_64 vDSO: unexpected Rel section\n");
break;
case SHT_RELA:
rela = (const Elf_Rela *)(mapping + shdr[i].sh_offset);
relacnt = shdr[i].sh_size / sizeof(*rela);
}
}
for (j = 0; j < relacnt; j++, rela++) {
where = (Elf_Addr *)(mapping + rela->r_offset);
addend = rela->r_addend;
rtype = ELF_R_TYPE(rela->r_info);
symidx = ELF_R_SYM(rela->r_info);
switch (rtype) {
case R_X86_64_NONE: /* none */
break;
case R_X86_64_RELATIVE: /* B + A */
addr = (Elf_Addr)(offset + addend);
val = addr;
if (*where != val)
*where = val;
break;
case R_X86_64_IRELATIVE:
printf("Linux x86_64 vDSO: unexpected ifunc relocation, "
"symbol index %ld\n", symidx);
break;
default:
printf("Linux x86_64 vDSO: unexpected relocation type %ld, "
"symbol index %ld\n", rtype, symidx);
}
}
}
static char GNULINUX_ABI_VENDOR[] = "GNU";
static int GNULINUX_ABI_DESC = 0;

View File

@ -54,16 +54,20 @@ VERSION
{
LINUX_2.6 {
global:
time;
__vdso_time;
gettimeofday;
__vdso_gettimeofday;
getcpu;
__vdso_getcpu;
clock_gettime;
__vdso_clock_gettime;
__vdso_clock_getres;
local: *;
};
LINUX_0.0 {
global:
linux_rt_sigcode;
linux_platform;
kern_timekeep_base;
kern_tsc_selector;
local: *;
};
}

View File

@ -0,0 +1,146 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2021 Dmitry Chagin <dchagin@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/elf.h>
#include <sys/errno.h>
#include <sys/proc.h>
#include <sys/stddef.h>
#define _KERNEL
#include <sys/vdso.h>
#undef _KERNEL
#include <stdbool.h>
#include <strings.h>
#include <machine/atomic.h>
#include <machine/stdarg.h>
#include <amd64/linux/linux.h>
#include <amd64/linux/linux_syscall.h>
#include <compat/linux/linux_errno.h>
#include <compat/linux/linux_timer.h>
/* The kernel fixup this at vDSO install */
uintptr_t *kern_timekeep_base = NULL;
uint32_t kern_tsc_selector = 0;
#include <x86/linux/linux_vdso_gettc_x86.inc>
/* for debug purpose */
static int
write(int fd, const void *buf, size_t size)
{
int res;
__asm__ __volatile__
(
"syscall"
: "=a"(res)
: "a"(LINUX_SYS_write), "D"(fd), "S"(buf), "d"(size)
: "cc", "rcx", "r11", "memory"
);
return (res);
}
static int
__vdso_clock_gettime_fallback(clockid_t clock_id, struct l_timespec *ts)
{
int res;
__asm__ __volatile__
(
"syscall"
: "=a"(res)
: "a"(LINUX_SYS_linux_clock_gettime), "D"(clock_id), "S"(ts)
: "cc", "rcx", "r11", "memory"
);
return (res);
}
static int
__vdso_gettimeofday_fallback(l_timeval *tv, struct timezone *tz)
{
int res;
__asm__ __volatile__
(
"syscall"
: "=a"(res)
: "a"(LINUX_SYS_gettimeofday), "D"(tv), "S"(tz)
: "cc", "rcx", "r11", "memory"
);
return (res);
}
static int
__vdso_clock_getres_fallback(clockid_t clock_id, struct l_timespec *ts)
{
int res;
__asm__ __volatile__
(
"syscall"
: "=a"(res)
: "a"(LINUX_SYS_linux_clock_getres), "D"(clock_id), "S"(ts)
: "cc", "rcx", "r11", "memory"
);
return (res);
}
static int
__vdso_getcpu_fallback(uint32_t *cpu, uint32_t *node, void *cache)
{
int res;
__asm__ __volatile__
(
"syscall"
: "=a"(res)
: "a"(LINUX_SYS_linux_getcpu), "D"(cpu), "S"(node), "d"(cache)
: "cc", "rcx", "r11", "memory"
);
return (res);
}
static int
__vdso_time_fallback(long *tm)
{
int res;
__asm__ __volatile__
(
"syscall"
: "=a"(res)
: "a"(LINUX_SYS_linux_time), "D"(tm)
: "cc", "rcx", "r11", "memory"
);
return (res);
}
#include <compat/linux/linux_vdso_gtod.inc>

View File

@ -18,7 +18,7 @@ linux_platform:
* To avoid excess stack frame the signal trampoline code emulates
* the 'call' instruction.
*/
ENTRY(linux32_sigcode)
ENTRY(__kernel_sigreturn)
movl %esp, %ebx /* preserve sigframe */
call .getip0
.getip0:
@ -33,7 +33,7 @@ ENTRY(linux32_sigcode)
.endsigcode:
0: jmp 0b
ENTRY(linux32_rt_sigcode)
ENTRY(__kernel_rt_sigreturn)
leal LINUX_RT_SIGF_UC(%esp),%ebx /* linux ucp */
leal LINUX_RT_SIGF_SC(%ebx),%ecx /* linux sigcontext */
movl %esp, %edi
@ -49,7 +49,7 @@ ENTRY(linux32_rt_sigcode)
.endrtsigcode:
0: jmp 0b
ENTRY(linux32_vsyscall)
ENTRY(__kernel_vsyscall)
.startvsyscall:
int $0x80
ret

View File

@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <sys/stddef.h>
#include <sys/signalvar.h>
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
@ -78,6 +79,7 @@ __FBSDID("$FreeBSD$");
#include <machine/specialreg.h>
#include <machine/trap.h>
#include <x86/linux/linux_x86.h>
#include <amd64/linux32/linux.h>
#include <amd64/linux32/linux32_proto.h>
#include <compat/linux/linux_emul.h>
@ -91,14 +93,21 @@ __FBSDID("$FreeBSD$");
MODULE_VERSION(linux, 1);
#define LINUX32_MAXUSER ((1ul << 32) - PAGE_SIZE)
#define LINUX32_SHAREDPAGE (LINUX32_MAXUSER - PAGE_SIZE)
#define LINUX32_VDSOPAGE_SIZE PAGE_SIZE * 2
#define LINUX32_VDSOPAGE (LINUX32_MAXUSER - LINUX32_VDSOPAGE_SIZE)
#define LINUX32_SHAREDPAGE (LINUX32_VDSOPAGE - PAGE_SIZE)
/*
* PAGE_SIZE - the size
* of the native SHAREDPAGE
*/
#define LINUX32_USRSTACK LINUX32_SHAREDPAGE
static int linux_szsigcode;
static vm_object_t linux_shared_page_obj;
static char *linux_shared_page_mapping;
extern char _binary_linux32_locore_o_start;
extern char _binary_linux32_locore_o_end;
static vm_object_t linux_vdso_obj;
static char *linux_vdso_mapping;
extern char _binary_linux32_vdso_so_o_start;
extern char _binary_linux32_vdso_so_o_end;
static vm_offset_t linux_vdso_base;
extern struct sysent linux32_sysent[LINUX32_SYS_MAXSYSCALL];
@ -111,12 +120,14 @@ static int linux_copyout_strings(struct image_params *imgp,
static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
static void linux_exec_setregs(struct thread *td,
struct image_params *imgp, uintptr_t stack);
static void linux_exec_sysvec_init(void *param);
static int linux_on_exec_vmspace(struct proc *p,
struct image_params *imgp);
static void linux32_fixlimit(struct rlimit *rl, int which);
static bool linux32_trans_osrel(const Elf_Note *note, int32_t *osrel);
static void linux_vdso_install(void *param);
static void linux_vdso_deinstall(void *param);
static void linux_vdso_reloc(char *mapping, Elf_Addr offset);
static void linux32_set_syscall_retval(struct thread *td, int error);
#define LINUX_T_UNKNOWN 255
@ -167,9 +178,11 @@ struct linux32_ps_strings {
#define LINUX32_PS_STRINGS (LINUX32_USRSTACK - \
sizeof(struct linux32_ps_strings))
LINUX_VDSO_SYM_INTPTR(linux32_sigcode);
LINUX_VDSO_SYM_INTPTR(linux32_rt_sigcode);
LINUX_VDSO_SYM_INTPTR(linux32_vsyscall);
LINUX_VDSO_SYM_INTPTR(__kernel_vsyscall);
LINUX_VDSO_SYM_INTPTR(__kernel_sigreturn);
LINUX_VDSO_SYM_INTPTR(__kernel_rt_sigreturn);
LINUX_VDSO_SYM_INTPTR(kern_timekeep_base);
LINUX_VDSO_SYM_INTPTR(kern_tsc_selector);
LINUX_VDSO_SYM_CHAR(linux_platform);
/*
@ -206,9 +219,8 @@ linux_copyout_auxargs(struct image_params *imgp, uintptr_t base)
M_WAITOK | M_ZERO);
issetugid = imgp->proc->p_flag & P_SUGID ? 1 : 0;
AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux32_vsyscall);
AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR,
imgp->proc->p_sysent->sv_shared_page_base);
AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, __kernel_vsyscall);
AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, linux_vdso_base);
AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
@ -354,7 +366,7 @@ linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
/* Build context to run handler in. */
regs->tf_rsp = PTROUT(fp);
regs->tf_rip = linux32_rt_sigcode;
regs->tf_rip = __kernel_rt_sigreturn;
regs->tf_rflags &= ~(PSL_T | PSL_D);
regs->tf_cs = _ucode32sel;
regs->tf_ss = _udatasel;
@ -460,7 +472,7 @@ linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
/* Build context to run handler in. */
regs->tf_rsp = PTROUT(fp);
regs->tf_rip = linux32_sigcode;
regs->tf_rip = __kernel_sigreturn;
regs->tf_rflags &= ~(PSL_T | PSL_D);
regs->tf_cs = _ucode32sel;
regs->tf_ss = _udatasel;
@ -911,7 +923,7 @@ struct sysentvec elf_linux_sysvec = {
.sv_transtrap = linux_translate_traps,
.sv_fixup = linux_fixup_elf,
.sv_sendsig = linux_sendsig,
.sv_sigcode = &_binary_linux32_locore_o_start,
.sv_sigcode = &_binary_linux32_vdso_so_o_start,
.sv_szsigcode = &linux_szsigcode,
.sv_name = "Linux ELF32",
.sv_coredump = elf32_coredump,
@ -931,7 +943,7 @@ struct sysentvec elf_linux_sysvec = {
.sv_fixlimit = linux32_fixlimit,
.sv_maxssiz = &linux32_maxssiz,
.sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP |
SV_SIG_DISCIGN | SV_SIG_WAITNDQ,
SV_SIG_DISCIGN | SV_SIG_WAITNDQ | SV_TIMEKEEP,
.sv_set_syscall_retval = linux32_set_syscall_retval,
.sv_fetch_syscall_args = linux32_fetch_syscall_args,
.sv_syscallnames = NULL,
@ -949,45 +961,127 @@ struct sysentvec elf_linux_sysvec = {
static int
linux_on_exec_vmspace(struct proc *p, struct image_params *imgp)
{
int error;
linux_on_exec(p, imgp);
return (0);
error = linux_map_vdso(p, linux_vdso_obj, linux_vdso_base,
LINUX32_VDSOPAGE_SIZE, imgp);
if (error == 0)
linux_on_exec(p, imgp);
return (error);
}
static void
linux_exec_sysvec_init(void *param)
{
l_uintptr_t *ktimekeep_base, *ktsc_selector;
struct sysentvec *sv;
ptrdiff_t tkoff;
sv = param;
/* Fill timekeep_base */
exec_sysvec_init(sv);
tkoff = kern_timekeep_base - linux_vdso_base;
ktimekeep_base = (l_uintptr_t *)(linux_vdso_mapping + tkoff);
*ktimekeep_base = sv->sv_timekeep_base;
tkoff = kern_tsc_selector - linux_vdso_base;
ktsc_selector = (l_uintptr_t *)(linux_vdso_mapping + tkoff);
*ktsc_selector = linux_vdso_tsc_selector_idx();
if (bootverbose)
printf("Linux i386 vDSO tsc_selector: %u\n", *ktsc_selector);
}
SYSINIT(elf_linux_exec_sysvec_init, SI_SUB_EXEC, SI_ORDER_ANY,
linux_exec_sysvec_init, &elf_linux_sysvec);
static void
linux_vdso_install(void *param)
{
char *vdso_start = &_binary_linux32_vdso_so_o_start;
char *vdso_end = &_binary_linux32_vdso_so_o_end;
linux_szsigcode = (&_binary_linux32_locore_o_end -
&_binary_linux32_locore_o_start);
linux_szsigcode = vdso_end - vdso_start;
MPASS(linux_szsigcode <= LINUX32_VDSOPAGE_SIZE);
if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
panic("Linux invalid vdso size\n");
linux_vdso_base = LINUX32_VDSOPAGE;
__elfN(linux_vdso_fixup)(&elf_linux_sysvec);
__elfN(linux_vdso_fixup)(vdso_start, linux_vdso_base);
linux_shared_page_obj = __elfN(linux_shared_page_init)
(&linux_shared_page_mapping);
linux_vdso_obj = __elfN(linux_shared_page_init)
(&linux_vdso_mapping, LINUX32_VDSOPAGE_SIZE);
bcopy(vdso_start, linux_vdso_mapping, linux_szsigcode);
__elfN(linux_vdso_reloc)(&elf_linux_sysvec);
bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping,
linux_szsigcode);
elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
linux_vdso_reloc(linux_vdso_mapping, linux_vdso_base);
}
SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_FIRST,
linux_vdso_install, NULL);
static void
linux_vdso_deinstall(void *param)
{
__elfN(linux_shared_page_fini)(linux_shared_page_obj,
linux_shared_page_mapping);
__elfN(linux_shared_page_fini)(linux_vdso_obj,
linux_vdso_mapping, LINUX32_VDSOPAGE_SIZE);
}
SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
linux_vdso_deinstall, NULL);
static void
linux_vdso_reloc(char *mapping, Elf_Addr offset)
{
const Elf_Shdr *shdr;
const Elf_Rel *rel;
const Elf_Ehdr *ehdr;
Elf32_Addr *where;
Elf_Size rtype, symidx;
Elf32_Addr addr, addend;
int i, relcnt;
MPASS(offset != 0);
relcnt = 0;
ehdr = (const Elf_Ehdr *)mapping;
shdr = (const Elf_Shdr *)(mapping + ehdr->e_shoff);
for (i = 0; i < ehdr->e_shnum; i++)
{
switch (shdr[i].sh_type) {
case SHT_REL:
rel = (const Elf_Rel *)(mapping + shdr[i].sh_offset);
relcnt = shdr[i].sh_size / sizeof(*rel);
break;
case SHT_RELA:
printf("Linux i386 vDSO: unexpected Rela section\n");
break;
}
}
for (i = 0; i < relcnt; i++, rel++) {
where = (Elf32_Addr *)(mapping + rel->r_offset);
addend = *where;
rtype = ELF_R_TYPE(rel->r_info);
symidx = ELF_R_SYM(rel->r_info);
switch (rtype) {
case R_386_NONE: /* none */
break;
case R_386_RELATIVE: /* B + A */
addr = (Elf32_Addr)PTROUT(offset + addend);
if (*where != addr)
*where = addr;
break;
case R_386_IRELATIVE:
printf("Linux i386 vDSO: unexpected ifunc relocation, "
"symbol index %ld\n", (intmax_t)symidx);
break;
default:
printf("Linux i386 vDSO: unexpected relocation type %ld, "
"symbol index %ld\n", (intmax_t)rtype, (intmax_t)symidx);
}
}
}
static char GNU_ABI_VENDOR[] = "GNU";
static int GNULINUX_ABI_DESC = 0;

View File

@ -51,16 +51,30 @@ PHDRS
eh_frame_hdr PT_GNU_EH_FRAME;
}
ENTRY(linux32_vsyscall);
VERSION
{
LINUX_2.6 {
global:
__vdso_clock_gettime;
__vdso_gettimeofday;
__vdso_time;
__vdso_clock_getres;
__vdso_clock_gettime64;
};
LINUX_2.5 {
global:
linux32_vsyscall;
linux32_sigcode;
linux32_rt_sigcode;
__kernel_vsyscall;
__kernel_sigreturn;
__kernel_rt_sigreturn;
local: *;
};
LINUX_0.0 {
global:
linux_platform;
kern_timekeep_base;
kern_tsc_selector;
local: *;
};
}

View File

@ -0,0 +1,146 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2021 Dmitry Chagin <dchagin@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/elf.h>
#include <sys/errno.h>
#include <sys/proc.h>
#include <sys/stddef.h>
#define _KERNEL
#include <sys/vdso.h>
#undef _KERNEL
#include <stdbool.h>
#include <strings.h>
#include <machine/atomic.h>
#include <machine/stdarg.h>
#include <amd64/linux32/linux.h>
#include <amd64/linux32/linux32_syscall.h>
#include <compat/linux/linux_errno.h>
#include <compat/linux/linux_timer.h>
/* The kernel fixup this at vDSO install */
uintptr_t *kern_timekeep_base = NULL;
uint32_t kern_tsc_selector = 0;
#include <x86/linux/linux_vdso_gettc_x86.inc>
static int
write(int fd, const void *buf, size_t size)
{
int res;
__asm__ __volatile__
(
"int $0x80"
: "=a"(res)
: "a"(LINUX32_SYS_write), "b"(fd), "c"(buf), "d"(size)
: "cc", "memory"
);
return (res);
}
static int
__vdso_clock_gettime_fallback(clockid_t clock_id, struct l_timespec *ts)
{
int res;
__asm__ __volatile__
(
"int $0x80"
: "=a"(res)
: "a"(LINUX32_SYS_linux_clock_gettime), "b"(clock_id), "c"(ts)
: "cc", "memory"
);
return (res);
}
static int
__vdso_clock_gettime64_fallback(clockid_t clock_id, struct l_timespec64 *ts)
{
int res;
__asm__ __volatile__
(
"int $0x80"
: "=a"(res)
: "a"(LINUX32_SYS_linux_clock_gettime64), "b"(clock_id), "c"(ts)
: "cc", "memory"
);
return (res);
}
static int
__vdso_gettimeofday_fallback(l_timeval *tv, struct timezone *tz)
{
int res;
__asm__ __volatile__
(
"int $0x80"
: "=a"(res)
: "a"(LINUX32_SYS_linux_gettimeofday), "b"(tv), "c"(tz)
: "cc", "memory"
);
return (res);
}
static int
__vdso_clock_getres_fallback(clockid_t clock_id, struct l_timespec *ts)
{
int res;
__asm__ __volatile__
(
"int $0x80"
: "=a"(res)
: "a"(LINUX32_SYS_linux_clock_getres), "b"(clock_id), "c"(ts)
: "cc", "memory"
);
return (res);
}
static int
__vdso_time_fallback(long *tm)
{
int res;
__asm__ __volatile__
(
"int $0x80"
: "=a"(res)
: "a"(LINUX32_SYS_linux_time), "b"(tm)
: "cc", "memory"
);
return (res);
}
#include <compat/linux/linux_vdso_gtod.inc>

View File

@ -41,10 +41,17 @@ __FBSDID("$FreeBSD$");
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/stddef.h>
#include <sys/signalvar.h>
#include <sys/sysctl.h>
#include <sys/sysent.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
#include <vm/vm_extern.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_param.h>
#include <arm64/linux/linux.h>
@ -65,11 +72,24 @@ __FBSDID("$FreeBSD$");
MODULE_VERSION(linux64elf, 1);
#define LINUX_VDSOPAGE_SIZE PAGE_SIZE * 2
#define LINUX_VDSOPAGE (VM_MAXUSER_ADDRESS - \
LINUX_VDSOPAGE_SIZE)
#define LINUX_SHAREDPAGE (LINUX_VDSOPAGE - PAGE_SIZE)
/*
* PAGE_SIZE - the size
* of the native SHAREDPAGE
*/
#define LINUX_USRSTACK LINUX_SHAREDPAGE
#define LINUX_PS_STRINGS (LINUX_USRSTACK - \
sizeof(struct ps_strings))
static int linux_szsigcode;
static vm_object_t linux_shared_page_obj;
static char *linux_shared_page_mapping;
extern char _binary_linux_locore_o_start;
extern char _binary_linux_locore_o_end;
static vm_object_t linux_vdso_obj;
static char *linux_vdso_mapping;
extern char _binary_linux_vdso_so_o_start;
extern char _binary_linux_vdso_so_o_end;
static vm_offset_t linux_vdso_base;
extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
@ -82,10 +102,12 @@ static int linux_elf_fixup(uintptr_t *stack_base,
static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
static void linux_vdso_install(const void *param);
static void linux_vdso_deinstall(const void *param);
static void linux_vdso_reloc(char *mapping, Elf_Addr offset);
static void linux_set_syscall_retval(struct thread *td, int error);
static int linux_fetch_syscall_args(struct thread *td);
static void linux_exec_setregs(struct thread *td, struct image_params *imgp,
uintptr_t stack);
static void linux_exec_sysvec_init(void *param);
static int linux_on_exec_vmspace(struct proc *p,
struct image_params *imgp);
@ -102,6 +124,10 @@ LIN_SDT_PROBE_DEFINE0(sysvec, linux_rt_sendsig, todo);
LIN_SDT_PROBE_DEFINE0(sysvec, linux_vdso_install, todo);
LIN_SDT_PROBE_DEFINE0(sysvec, linux_vdso_deinstall, todo);
LINUX_VDSO_SYM_CHAR(linux_platform);
LINUX_VDSO_SYM_INTPTR(kern_timekeep_base);
LINUX_VDSO_SYM_INTPTR(__kernel_rt_sigreturn);
/* LINUXTODO: do we have traps to translate? */
static int
linux_translate_traps(int signal, int trap_code)
@ -111,8 +137,6 @@ linux_translate_traps(int signal, int trap_code)
return (signal);
}
LINUX_VDSO_SYM_CHAR(linux_platform);
static int
linux_fetch_syscall_args(struct thread *td)
{
@ -169,8 +193,7 @@ linux_copyout_auxargs(struct image_params *imgp, uintptr_t base)
M_WAITOK | M_ZERO);
issetugid = p->p_flag & P_SUGID ? 1 : 0;
AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR,
imgp->proc->p_sysent->sv_shared_page_base);
AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, linux_vdso_base);
AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, *imgp->sysent->sv_hwcap);
AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
@ -404,7 +427,7 @@ struct sysentvec elf_linux_sysvec = {
.sv_transtrap = linux_translate_traps,
.sv_fixup = linux_elf_fixup,
.sv_sendsig = linux_rt_sendsig,
.sv_sigcode = &_binary_linux_locore_o_start,
.sv_sigcode = &_binary_linux_vdso_so_o_start,
.sv_szsigcode = &linux_szsigcode,
.sv_name = "Linux ELF64",
.sv_coredump = elf64_coredump,
@ -415,8 +438,8 @@ struct sysentvec elf_linux_sysvec = {
.sv_minsigstksz = LINUX_MINSIGSTKSZ,
.sv_minuser = VM_MIN_ADDRESS,
.sv_maxuser = VM_MAXUSER_ADDRESS,
.sv_usrstack = USRSTACK,
.sv_psstrings = PS_STRINGS, /* XXX */
.sv_usrstack = LINUX_USRSTACK,
.sv_psstrings = LINUX_PS_STRINGS,
.sv_stackprot = VM_PROT_READ | VM_PROT_WRITE,
.sv_copyout_auxargs = linux_copyout_auxargs,
.sv_copyout_strings = linux_copyout_strings,
@ -424,11 +447,11 @@ struct sysentvec elf_linux_sysvec = {
.sv_fixlimit = NULL,
.sv_maxssiz = NULL,
.sv_flags = SV_ABI_LINUX | SV_LP64 | SV_SHP | SV_SIG_DISCIGN |
SV_SIG_WAITNDQ,
SV_SIG_WAITNDQ | SV_TIMEKEEP,
.sv_set_syscall_retval = linux_set_syscall_retval,
.sv_fetch_syscall_args = linux_fetch_syscall_args,
.sv_syscallnames = NULL,
.sv_shared_page_base = SHAREDPAGE,
.sv_shared_page_base = LINUX_SHAREDPAGE,
.sv_shared_page_len = PAGE_SIZE,
.sv_schedtail = linux_schedtail,
.sv_thread_detach = linux_thread_detach,
@ -444,46 +467,115 @@ struct sysentvec elf_linux_sysvec = {
static int
linux_on_exec_vmspace(struct proc *p, struct image_params *imgp)
{
int error;
linux_on_exec(p, imgp);
return (0);
error = linux_map_vdso(p, linux_vdso_obj, linux_vdso_base,
LINUX_VDSOPAGE_SIZE, imgp);
if (error == 0)
linux_on_exec(p, imgp);
return (error);
}
static void
linux_exec_sysvec_init(void *param)
{
l_uintptr_t *ktimekeep_base;
struct sysentvec *sv;
ptrdiff_t tkoff;
sv = param;
/* Fill timekeep_base */
exec_sysvec_init(sv);
tkoff = kern_timekeep_base - linux_vdso_base;
ktimekeep_base = (l_uintptr_t *)(linux_vdso_mapping + tkoff);
*ktimekeep_base = sv->sv_timekeep_base;
}
SYSINIT(elf_linux_exec_sysvec_init, SI_SUB_EXEC, SI_ORDER_ANY,
linux_exec_sysvec_init, &elf_linux_sysvec);
static void
linux_vdso_install(const void *param)
{
char *vdso_start = &_binary_linux_vdso_so_o_start;
char *vdso_end = &_binary_linux_vdso_so_o_end;
linux_szsigcode = (&_binary_linux_locore_o_end -
&_binary_linux_locore_o_start);
linux_szsigcode = vdso_end - vdso_start;
MPASS(linux_szsigcode <= LINUX_VDSOPAGE_SIZE);
if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
panic("invalid Linux VDSO size\n");
linux_vdso_base = LINUX_VDSOPAGE;
__elfN(linux_vdso_fixup)(&elf_linux_sysvec);
__elfN(linux_vdso_fixup)(vdso_start, linux_vdso_base);
linux_shared_page_obj = __elfN(linux_shared_page_init)
(&linux_shared_page_mapping);
linux_vdso_obj = __elfN(linux_shared_page_init)
(&linux_vdso_mapping, LINUX_VDSOPAGE_SIZE);
bcopy(vdso_start, linux_vdso_mapping, linux_szsigcode);
__elfN(linux_vdso_reloc)(&elf_linux_sysvec);
memcpy(linux_shared_page_mapping, elf_linux_sysvec.sv_sigcode,
linux_szsigcode);
elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
linux_vdso_reloc(linux_vdso_mapping, linux_vdso_base);
}
SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_FIRST,
linux_vdso_install, NULL);
static void
linux_vdso_deinstall(const void *param)
{
LIN_SDT_PROBE0(sysvec, linux_vdso_deinstall, todo);
__elfN(linux_shared_page_fini)(linux_shared_page_obj,
linux_shared_page_mapping);
__elfN(linux_shared_page_fini)(linux_vdso_obj,
linux_vdso_mapping, LINUX_VDSOPAGE_SIZE);
}
SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
linux_vdso_deinstall, NULL);
static void
linux_vdso_reloc(char *mapping, Elf_Addr offset)
{
Elf_Size rtype, symidx;
const Elf_Rela *rela;
const Elf_Shdr *shdr;
const Elf_Ehdr *ehdr;
Elf_Addr *where;
Elf_Addr addr, addend;
int i, relacnt;
MPASS(offset != 0);
relacnt = 0;
ehdr = (const Elf_Ehdr *)mapping;
shdr = (const Elf_Shdr *)(mapping + ehdr->e_shoff);
for (i = 0; i < ehdr->e_shnum; i++)
{
switch (shdr[i].sh_type) {
case SHT_REL:
printf("Linux Aarch64 vDSO: unexpected Rel section\n");
break;
case SHT_RELA:
rela = (const Elf_Rela *)(mapping + shdr[i].sh_offset);
relacnt = shdr[i].sh_size / sizeof(*rela);
}
}
for (i = 0; i < relacnt; i++, rela++) {
where = (Elf_Addr *)(mapping + rela->r_offset);
addend = rela->r_addend;
rtype = ELF_R_TYPE(rela->r_info);
symidx = ELF_R_SYM(rela->r_info);
switch (rtype) {
case R_AARCH64_NONE: /* none */
break;
case R_AARCH64_RELATIVE: /* B + A */
addr = (Elf_Addr)(mapping + addend);
if (*where != addr)
*where = addr;
break;
default:
printf("Linux Aarch64 vDSO: unexpected relocation type %ld, "
"symbol index %ld\n", rtype, symidx);
}
}
}
static char GNU_ABI_VENDOR[] = "GNU";
static int GNU_ABI_LINUX = 0;

View File

@ -1,6 +1,6 @@
/*
* Stub arm64 vdso linker script.
* LINUXTODO: update along with VDSO implementation
* Linker script for 64-bit vDSO.
* Copied from Linux kernel arch/x86/vdso/vdso-layout.lds.S
*
* $FreeBSD$
*/
@ -8,15 +8,66 @@
SECTIONS
{
. = . + SIZEOF_HEADERS;
.text : { *(.text*) }
.rodata : { *(.rodata*) }
.hash : { *(.hash) }
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
.dynsym : { *(.dynsym) }
.dynstr : { *(.dynstr) }
.gnu.version : { *(.gnu.version) }
.gnu.version_d : { *(.gnu.version_d) }
.gnu.version_r : { *(.gnu.version_r) }
.data : { *(.data*) }
.dynamic : { *(.dynamic) }
.note : { *(.note.*) } :text :note
.eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
.eh_frame : { KEEP (*(.eh_frame)) } :text
.dynamic : { *(.dynamic) } :text :dynamic
.rodata : { *(.rodata*) } :text
.data : {
*(.data*)
*(.sdata*)
*(.got.plt) *(.got)
*(.gnu.linkonce.d.*)
*(.bss*)
*(.dynbss*)
*(.gnu.linkonce.b.*)
}
.altinstructions : { *(.altinstructions) }
.altinstr_replacement : { *(.altinstr_replacement) }
. = ALIGN(0x100);
.text : { *(.test .text*) } :text =0x90909090
}
PHDRS
{
text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
note PT_NOTE FLAGS(4); /* PF_R */
eh_frame_hdr PT_GNU_EH_FRAME;
}
/*
* This controls what symbols we export from the DSO.
*/
VERSION
{
LINUX_2.6.39 {
global:
__kernel_rt_sigreturn;
__kernel_gettimeofday;
__kernel_clock_gettime;
__kernel_clock_getres;
local: *;
};
LINUX_0.0 {
global:
linux_platform;
kern_timekeep_base;
local: *;
};
}

View File

@ -0,0 +1,153 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2012 Konstantin Belousov <kib@FreeBSD.org>
* Copyright (c) 2021 Dmitry Chagin <dchagin@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/elf.h>
#include <sys/errno.h>
#include <sys/proc.h>
#include <sys/stddef.h>
#define _KERNEL
#include <sys/vdso.h>
#undef _KERNEL
#include <stdbool.h>
#include <strings.h>
#include <machine/atomic.h>
#include <machine/stdarg.h>
#include <arm64/linux/linux.h>
#include <arm64/linux/linux_syscall.h>
#include <compat/linux/linux_errno.h>
#include <compat/linux/linux_timer.h>
/* The kernel fixup this at vDSO install */
uintptr_t *kern_timekeep_base = NULL;
uint32_t kern_tsc_selector = 0;
static int
write(int lfd, const void *lbuf, size_t lsize)
{
register long svc asm("x8") = LINUX_SYS_write;
register int fd asm("x0") = lfd;
register const char *buf asm("x1") = lbuf;
register long size asm("x2") = lsize;
register long res asm ("x0");
asm volatile(
" svc #0\n"
: "=r" (res)
: "r" (fd), "r" (buf), "r" (size), "r" (svc)
: "memory");
return (res);
}
static int
__vdso_clock_gettime_fallback(clockid_t clock_id, struct l_timespec *lts)
{
register long svc asm("x8") = LINUX_SYS_linux_clock_gettime;
register clockid_t clockid asm("x0") = clock_id;
register struct l_timespec *ts asm("x1") = lts;
register long res asm ("x0");
asm volatile(
" svc #0\n"
: "=r" (res)
: "r" (clockid), "r" (ts), "r" (svc)
: "memory");
return (res);
}
static int
__vdso_gettimeofday_fallback(l_timeval *ltv, struct timezone *ltz)
{
register long svc asm("x8") = LINUX_SYS_gettimeofday;
register l_timeval *tv asm("x0") = ltv;
register struct timezone *tz asm("x1") = ltz;
register long res asm ("x0");
asm volatile(
" svc #0\n"
: "=r" (res)
: "r" (tv), "r" (tz), "r" (svc)
: "memory");
return (res);
}
static int
__vdso_clock_getres_fallback(clockid_t clock_id, struct l_timespec *lts)
{
register long svc asm("x8") = LINUX_SYS_linux_clock_getres;
register clockid_t clockid asm("x0") = clock_id;
register struct l_timespec *ts asm("x1") = lts;
register long res asm ("x0");
asm volatile(
" svc #0\n"
: "=r" (res)
: "r" (clockid), "r" (ts), "r" (svc)
: "memory");
return (res);
}
/*
* copied from lib/libc/aarch64/sys/__vdso_gettc.c
*/
static inline uint64_t
cp15_cntvct_get(void)
{
uint64_t reg;
__asm __volatile("mrs %0, cntvct_el0" : "=r" (reg));
return (reg);
}
static inline uint64_t
cp15_cntpct_get(void)
{
uint64_t reg;
__asm __volatile("mrs %0, cntpct_el0" : "=r" (reg));
return (reg);
}
int
__vdso_gettc(const struct vdso_timehands *th, u_int *tc)
{
if (th->th_algo != VDSO_TH_ALGO_ARM_GENTIM)
return (ENOSYS);
__asm __volatile("isb" : : : "memory");
*tc = th->th_physical == 0 ? cp15_cntvct_get() : cp15_cntpct_get();
return (0);
}
#include <compat/linux/linux_vdso_gtod.inc>

View File

@ -38,17 +38,16 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/elf.h>
#include <sys/imgact.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/proc.h>
#include <sys/rwlock.h>
#include <sys/queue.h>
#include <sys/sysent.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
@ -59,12 +58,6 @@ __FBSDID("$FreeBSD$");
SLIST_HEAD(, linux_vdso_sym) __elfN(linux_vdso_syms) =
SLIST_HEAD_INITIALIZER(__elfN(linux_vdso_syms));
static int __elfN(symtabindex);
static int __elfN(symstrindex);
static void
__elfN(linux_vdso_lookup)(Elf_Ehdr *, struct linux_vdso_sym *);
void
__elfN(linux_vdso_sym_init)(struct linux_vdso_sym *s)
{
@ -73,176 +66,119 @@ __elfN(linux_vdso_sym_init)(struct linux_vdso_sym *s)
}
vm_object_t
__elfN(linux_shared_page_init)(char **mapping)
__elfN(linux_shared_page_init)(char **mapping, vm_size_t size)
{
vm_page_t m;
vm_object_t obj;
vm_offset_t addr;
size_t n, pages;
obj = vm_pager_allocate(OBJT_PHYS, 0, PAGE_SIZE,
pages = size / PAGE_SIZE;
addr = kva_alloc(size);
obj = vm_pager_allocate(OBJT_PHYS, 0, size,
VM_PROT_DEFAULT, 0, NULL);
VM_OBJECT_WLOCK(obj);
m = vm_page_grab(obj, 0, VM_ALLOC_ZERO);
for (n = 0; n < pages; n++) {
m = vm_page_grab(obj, n,
VM_ALLOC_ZERO);
vm_page_valid(m);
vm_page_xunbusy(m);
pmap_qenter(addr + n * PAGE_SIZE, &m, 1);
}
VM_OBJECT_WUNLOCK(obj);
vm_page_valid(m);
vm_page_xunbusy(m);
addr = kva_alloc(PAGE_SIZE);
pmap_qenter(addr, &m, 1);
*mapping = (char *)addr;
return (obj);
}
void
__elfN(linux_shared_page_fini)(vm_object_t obj, void *mapping)
__elfN(linux_shared_page_fini)(vm_object_t obj, void *mapping,
vm_size_t size)
{
vm_offset_t va;
va = (vm_offset_t)mapping;
pmap_qremove(va, 1);
kva_free(va, PAGE_SIZE);
pmap_qremove(va, size / PAGE_SIZE);
kva_free(va, size);
vm_object_deallocate(obj);
}
void
__elfN(linux_vdso_fixup)(struct sysentvec *sv)
__elfN(linux_vdso_fixup)(char *base, vm_offset_t offset)
{
struct linux_vdso_sym *lsym;
const Elf_Shdr *shdr;
Elf_Ehdr *ehdr;
Elf_Shdr *shdr;
int i;
Elf_Sym *dsym, *sym;
char *strtab, *symname;
int i, symcnt;
ehdr = (Elf_Ehdr *) sv->sv_sigcode;
ehdr = (Elf_Ehdr *)base;
if (!IS_ELF(*ehdr) ||
ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
ehdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
ehdr->e_ident[EI_VERSION] != EV_CURRENT ||
ehdr->e_shoff == 0 ||
ehdr->e_shentsize != sizeof(Elf_Shdr))
panic("Linux invalid vdso header.\n");
MPASS(IS_ELF(*ehdr));
MPASS(ehdr->e_ident[EI_CLASS] == ELF_TARG_CLASS);
MPASS(ehdr->e_ident[EI_DATA] == ELF_TARG_DATA);
MPASS(ehdr->e_ident[EI_VERSION] == EV_CURRENT);
MPASS(ehdr->e_shentsize == sizeof(Elf_Shdr));
MPASS(ehdr->e_shoff != 0);
MPASS(ehdr->e_type == ET_DYN);
if (ehdr->e_type != ET_DYN)
panic("Linux invalid vdso header.\n");
shdr = (const Elf_Shdr *)(base + ehdr->e_shoff);
shdr = (Elf_Shdr *) ((caddr_t)ehdr + ehdr->e_shoff);
__elfN(symtabindex) = -1;
__elfN(symstrindex) = -1;
dsym = NULL;
for (i = 0; i < ehdr->e_shnum; i++) {
if (shdr[i].sh_size == 0)
continue;
if (shdr[i].sh_type == SHT_DYNSYM) {
__elfN(symtabindex) = i;
__elfN(symstrindex) = shdr[i].sh_link;
}
}
if (__elfN(symtabindex) == -1 || __elfN(symstrindex) == -1)
panic("Linux invalid vdso header.\n");
ehdr->e_ident[EI_OSABI] = ELFOSABI_LINUX;
}
void
__elfN(linux_vdso_reloc)(struct sysentvec *sv)
{
struct linux_vdso_sym *lsym;
Elf_Ehdr *ehdr;
Elf_Phdr *phdr;
Elf_Shdr *shdr;
Elf_Dyn *dyn;
Elf_Sym *sym;
int i, j, symcnt;
ehdr = (Elf_Ehdr *) sv->sv_sigcode;
/* Adjust our so relative to the sigcode_base */
if (sv->sv_shared_page_base != 0) {
ehdr->e_entry += sv->sv_shared_page_base;
phdr = (Elf_Phdr *)((caddr_t)ehdr + ehdr->e_phoff);
/* phdrs */
for (i = 0; i < ehdr->e_phnum; i++) {
phdr[i].p_vaddr += sv->sv_shared_page_base;
if (phdr[i].p_type != PT_DYNAMIC)
continue;
dyn = (Elf_Dyn *)((caddr_t)ehdr + phdr[i].p_offset);
for(; dyn->d_tag != DT_NULL; dyn++) {
switch (dyn->d_tag) {
case DT_PLTGOT:
case DT_HASH:
case DT_STRTAB:
case DT_SYMTAB:
case DT_RELA:
case DT_INIT:
case DT_FINI:
case DT_REL:
case DT_DEBUG:
case DT_JMPREL:
case DT_VERSYM:
case DT_VERDEF:
case DT_VERNEED:
case DT_ADDRRNGLO ... DT_ADDRRNGHI:
dyn->d_un.d_ptr += sv->sv_shared_page_base;
break;
case DT_ENCODING ... DT_LOOS-1:
case DT_LOOS ... DT_HIOS:
if (dyn->d_tag >= DT_ENCODING &&
(dyn->d_tag & 1) == 0)
dyn->d_un.d_ptr += sv->sv_shared_page_base;
break;
default:
break;
}
}
}
/* sections */
shdr = (Elf_Shdr *)((caddr_t)ehdr + ehdr->e_shoff);
for(i = 0; i < ehdr->e_shnum; i++) {
if (!(shdr[i].sh_flags & SHF_ALLOC))
continue;
shdr[i].sh_addr += sv->sv_shared_page_base;
if (shdr[i].sh_type != SHT_SYMTAB &&
shdr[i].sh_type != SHT_DYNSYM)
continue;
sym = (Elf_Sym *)((caddr_t)ehdr + shdr[i].sh_offset);
symcnt = shdr[i].sh_size / sizeof(*sym);
for(j = 0; j < symcnt; j++, sym++) {
if (sym->st_shndx == SHN_UNDEF ||
sym->st_shndx == SHN_ABS)
continue;
sym->st_value += sv->sv_shared_page_base;
}
}
}
SLIST_FOREACH(lsym, &__elfN(linux_vdso_syms), sym)
__elfN(linux_vdso_lookup)(ehdr, lsym);
}
static void
__elfN(linux_vdso_lookup)(Elf_Ehdr *ehdr, struct linux_vdso_sym *vsym)
{
vm_offset_t strtab, symname;
uint32_t symcnt;
Elf_Shdr *shdr;
int i;
shdr = (Elf_Shdr *) ((caddr_t)ehdr + ehdr->e_shoff);
strtab = (vm_offset_t)((caddr_t)ehdr +
shdr[__elfN(symstrindex)].sh_offset);
Elf_Sym *sym = (Elf_Sym *)((caddr_t)ehdr +
shdr[__elfN(symtabindex)].sh_offset);
symcnt = shdr[__elfN(symtabindex)].sh_size / sizeof(*sym);
for (i = 0; i < symcnt; ++i, ++sym) {
symname = strtab + sym->st_name;
if (strncmp(vsym->symname, (char *)symname, vsym->size) == 0) {
*vsym->ptr = (uintptr_t)sym->st_value;
dsym = (Elf_Sym *)(base + shdr[i].sh_offset);
strtab = base + shdr[shdr[i].sh_link].sh_offset;
symcnt = shdr[i].sh_size / sizeof(*dsym);
break;
}
}
MPASS(dsym != NULL);
ehdr->e_ident[EI_OSABI] = ELFOSABI_LINUX;
/*
* VDSO is readonly mapped to the process VA and
* can't be relocated by rtld.
*/
SLIST_FOREACH(lsym, &__elfN(linux_vdso_syms), sym) {
for (i = 0, sym = dsym; i < symcnt; i++, sym++) {
symname = strtab + sym->st_name;
if (strncmp(lsym->symname, symname, lsym->size) == 0) {
sym->st_value += offset;
*lsym->ptr = sym->st_value;
break;
}
}
}
}
int
linux_map_vdso(struct proc *p, vm_object_t obj, vm_offset_t base,
vm_offset_t size, struct image_params *imgp)
{
struct vmspace *vmspace;
vm_map_t map;
int error;
MPASS((imgp->sysent->sv_flags & SV_ABI_MASK) == SV_ABI_LINUX);
MPASS(obj != NULL);
vmspace = p->p_vmspace;
map = &vmspace->vm_map;
vm_object_reference(obj);
error = vm_map_fixed(map, obj, 0, base, size,
VM_PROT_READ | VM_PROT_EXECUTE,
VM_PROT_READ | VM_PROT_EXECUTE,
MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE);
if (error != KERN_SUCCESS) {
vm_object_deallocate(obj);
return (vm_mmap_to_errno(error));
}
return (0);
}

View File

@ -38,12 +38,14 @@ struct linux_vdso_sym {
char symname[];
};
vm_object_t __elfN(linux_shared_page_init)(char **);
void __elfN(linux_shared_page_fini)(vm_object_t, void *);
void __elfN(linux_vdso_fixup)(struct sysentvec *);
void __elfN(linux_vdso_reloc)(struct sysentvec *);
vm_object_t __elfN(linux_shared_page_init)(char **, vm_size_t);
void __elfN(linux_shared_page_fini)(vm_object_t, void *, vm_size_t);
void __elfN(linux_vdso_fixup)(char *, vm_offset_t);
void __elfN(linux_vdso_sym_init)(struct linux_vdso_sym *);
int linux_map_vdso(struct proc *, vm_object_t, vm_offset_t,
vm_offset_t, struct image_params *);
#define LINUX_VDSO_SYM_INTPTR(name) \
uintptr_t name; \
LINUX_VDSO_SYM_DEFINE(name)

View File

@ -0,0 +1,337 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2012 Konstantin Belousov <kib@FreeBSD.org>
* Copyright (c) 2021 Dmitry Chagin <dchagin@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
static int
__vdso_native_to_linux_timespec(struct l_timespec *lts,
struct timespec *nts)
{
#ifdef COMPAT_LINUX32
if (nts->tv_sec > INT_MAX || nts->tv_sec < INT_MIN)
return (LINUX_EOVERFLOW);
#endif
lts->tv_sec = nts->tv_sec;
lts->tv_nsec = nts->tv_nsec;
return (0);
}
static int
__vdso_native_to_linux_timeval(l_timeval *ltv,
struct timeval *ntv)
{
#ifdef COMPAT_LINUX32
if (ntv->tv_sec > INT_MAX || ntv->tv_sec < INT_MIN)
return (LINUX_EOVERFLOW);
#endif
ltv->tv_sec = ntv->tv_sec;
ltv->tv_usec = ntv->tv_usec;
return (0);
}
#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
static int
__vdso_native_to_linux_timespec64(struct l_timespec64 *lts,
struct timespec *nts)
{
lts->tv_sec = nts->tv_sec;
lts->tv_nsec = nts->tv_nsec;
return (0);
}
#endif
static int
__vdso_linux_to_native_clockid(clockid_t *n, clockid_t l)
{
switch (l) {
case LINUX_CLOCK_REALTIME:
*n = CLOCK_REALTIME;
break;
case LINUX_CLOCK_MONOTONIC:
*n = CLOCK_MONOTONIC;
break;
case LINUX_CLOCK_REALTIME_COARSE:
*n = CLOCK_REALTIME_FAST;
break;
case LINUX_CLOCK_MONOTONIC_COARSE:
case LINUX_CLOCK_MONOTONIC_RAW:
*n = CLOCK_MONOTONIC_FAST;
break;
case LINUX_CLOCK_BOOTTIME:
*n = CLOCK_UPTIME;
break;
default:
return (LINUX_EINVAL);
}
return (0);
}
/*
* The code below adapted from
* lib/libc/sys/__vdso_gettimeofday.c
*/
static inline void
__vdso_gettimekeep(struct vdso_timekeep **tk)
{
*tk = (struct vdso_timekeep *)kern_timekeep_base;
}
static int
tc_delta(const struct vdso_timehands *th, u_int *delta)
{
int error;
u_int tc;
error = __vdso_gettc(th, &tc);
if (error == 0)
*delta = (tc - th->th_offset_count) & th->th_counter_mask;
return (error);
}
/*
* Calculate the absolute or boot-relative time from the
* machine-specific fast timecounter and the published timehands
* structure read from the shared page.
*
* The lockless reading scheme is similar to the one used to read the
* in-kernel timehands, see sys/kern/kern_tc.c:binuptime(). This code
* is based on the kernel implementation.
*/
static int
freebsd_binuptime(struct bintime *bt, struct vdso_timekeep *tk, bool abs)
{
struct vdso_timehands *th;
uint32_t curr, gen;
uint64_t scale, x;
u_int delta, scale_bits;
int error;
do {
if (!tk->tk_enabled)
return (ENOSYS);
curr = atomic_load_acq_32(&tk->tk_current);
th = &tk->tk_th[curr];
gen = atomic_load_acq_32(&th->th_gen);
*bt = th->th_offset;
error = tc_delta(th, &delta);
if (error == EAGAIN)
continue;
if (error != 0)
return (error);
scale = th->th_scale;
#ifdef _LP64
scale_bits = ffsl(scale);
#else
scale_bits = ffsll(scale);
#endif
if (__predict_false(scale_bits + fls(delta) > 63)) {
x = (scale >> 32) * delta;
scale &= 0xffffffff;
bt->sec += x >> 32;
bintime_addx(bt, x << 32);
}
bintime_addx(bt, scale * delta);
if (abs)
bintime_add(bt, &th->th_boottime);
/*
* Ensure that the load of th_offset is completed
* before the load of th_gen.
*/
atomic_thread_fence_acq();
} while (curr != tk->tk_current || gen == 0 || gen != th->th_gen);
return (0);
}
static int
freebsd_getnanouptime(struct bintime *bt, struct vdso_timekeep *tk)
{
struct vdso_timehands *th;
uint32_t curr, gen;
do {
if (!tk->tk_enabled)
return (ENOSYS);
curr = atomic_load_acq_32(&tk->tk_current);
th = &tk->tk_th[curr];
gen = atomic_load_acq_32(&th->th_gen);
*bt = th->th_offset;
/*
* Ensure that the load of th_offset is completed
* before the load of th_gen.
*/
atomic_thread_fence_acq();
} while (curr != tk->tk_current || gen == 0 || gen != th->th_gen);
return (0);
}
static int
freebsd_gettimeofday(struct timeval *tv, struct timezone *tz)
{
struct vdso_timekeep *tk;
struct bintime bt;
int error;
if (tz != NULL)
return (ENOSYS);
__vdso_gettimekeep(&tk);
if (tk == NULL)
return (ENOSYS);
if (tk->tk_ver != VDSO_TK_VER_CURR)
return (ENOSYS);
error = freebsd_binuptime(&bt, tk, true);
if (error == 0)
bintime2timeval(&bt, tv);
return (error);
}
static int
freebsd_clock_gettime(clockid_t clock_id, struct timespec *ts)
{
struct vdso_timekeep *tk;
struct bintime bt;
int error;
__vdso_gettimekeep(&tk);
if (tk == NULL)
return (ENOSYS);
if (tk->tk_ver != VDSO_TK_VER_CURR)
return (ENOSYS);
switch (clock_id) {
case CLOCK_REALTIME:
case CLOCK_REALTIME_PRECISE:
case CLOCK_REALTIME_FAST:
error = freebsd_binuptime(&bt, tk, true);
break;
case CLOCK_MONOTONIC:
case CLOCK_MONOTONIC_PRECISE:
case CLOCK_UPTIME:
case CLOCK_UPTIME_PRECISE:
error = freebsd_binuptime(&bt, tk, false);
break;
case CLOCK_MONOTONIC_FAST:
case CLOCK_UPTIME_FAST:
error = freebsd_getnanouptime(&bt, tk);
break;
default:
error = ENOSYS;
break;
}
if (error == 0)
bintime2timespec(&bt, ts);
return (error);
}
/*
* Linux vDSO interfaces
*
*/
int
__vdso_clock_gettime(clockid_t clock_id, struct l_timespec *lts)
{
struct timespec ts;
clockid_t which;
int error;
error = __vdso_linux_to_native_clockid(&which, clock_id);
if (error != 0)
return (__vdso_clock_gettime_fallback(clock_id, lts));
error = freebsd_clock_gettime(which, &ts);
if (error == 0)
return (-__vdso_native_to_linux_timespec(lts, &ts));
else
return (__vdso_clock_gettime_fallback(clock_id, lts));
}
int
__vdso_gettimeofday(l_timeval *ltv, struct timezone *tz)
{
struct timeval tv;
int error;
error = freebsd_gettimeofday(&tv, tz);
if (error != 0)
return (__vdso_gettimeofday_fallback(ltv, tz));
return (-__vdso_native_to_linux_timeval(ltv, &tv));
}
int
__vdso_clock_getres(clockid_t clock_id, struct l_timespec *lts)
{
return (__vdso_clock_getres_fallback(clock_id, lts));
}
#if defined(__i386__) || defined(COMPAT_LINUX32)
int
__vdso_clock_gettime64(clockid_t clock_id, struct l_timespec64 *lts)
{
struct timespec ts;
clockid_t which;
int error;
error = __vdso_linux_to_native_clockid(&which, clock_id);
if (error != 0)
return (__vdso_clock_gettime64_fallback(clock_id, lts));
error = freebsd_clock_gettime(which, &ts);
if (error == 0)
return(-__vdso_native_to_linux_timespec64(lts, &ts));
else
return(__vdso_clock_gettime64_fallback(clock_id, lts));
}
int clock_gettime64(clockid_t clock_id, struct l_timespec64 *lts)
__attribute__((weak, alias("__vdso_clock_gettime64")));
#endif
#if defined(__amd64__) && !defined(COMPAT_LINUX32)
int
__vdso_getcpu(uint32_t *cpu, uint32_t *node, void *cache)
{
return (__vdso_getcpu_fallback(cpu, node, cache));
}
#endif
#if defined(__i386__) || defined(__amd64__)
int
__vdso_time(long *tm)
{
return (__vdso_time_fallback(tm));
}
#endif

View File

@ -39,9 +39,6 @@
#define LINUX_DTRACE linuxulator
#define LINUX_SHAREDPAGE (VM_MAXUSER_ADDRESS - PAGE_SIZE)
#define LINUX_USRSTACK LINUX_SHAREDPAGE
/*
* Provide a separate set of types for the Linux types.
*/

View File

@ -19,7 +19,7 @@ linux_platform:
* To avoid excess stack frame the signal trampoline code emulates
* the 'call' instruction.
*/
ENTRY(linux_sigcode)
ENTRY(__kernel_sigreturn)
movl %esp, %ebx /* preserve sigframe */
call .getip0
.getip0:
@ -34,7 +34,7 @@ ENTRY(linux_sigcode)
.endsigcode:
0: jmp 0b
ENTRY(linux_rt_sigcode)
ENTRY(__kernel_rt_sigreturn)
leal LINUX_RT_SIGF_UC(%esp),%ebx /* linux ucp */
leal LINUX_RT_SIGF_SC(%ebx),%ecx /* linux sigcontext */
movl %esp, %edi
@ -50,7 +50,7 @@ ENTRY(linux_rt_sigcode)
.endrtsigcode:
0: jmp 0b
ENTRY(linux_vsyscall)
ENTRY(__kernel_vsyscall)
.startvsyscall:
int $0x80
ret

View File

@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/stddef.h>
#include <sys/signalvar.h>
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
@ -63,6 +64,7 @@ __FBSDID("$FreeBSD$");
#include <machine/pcb.h>
#include <machine/trap.h>
#include <x86/linux/linux_x86.h>
#include <i386/linux/linux.h>
#include <i386/linux/linux_proto.h>
#include <compat/linux/linux_emul.h>
@ -75,13 +77,22 @@ __FBSDID("$FreeBSD$");
MODULE_VERSION(linux, 1);
#define LINUX_VDSOPAGE_SIZE PAGE_SIZE * 2
#define LINUX_VDSOPAGE (VM_MAXUSER_ADDRESS - LINUX_VDSOPAGE_SIZE)
#define LINUX_SHAREDPAGE (LINUX_VDSOPAGE - PAGE_SIZE)
/*
* PAGE_SIZE - the size
* of the native SHAREDPAGE
*/
#define LINUX_USRSTACK LINUX_SHAREDPAGE
#define LINUX_PS_STRINGS (LINUX_USRSTACK - sizeof(struct ps_strings))
static int linux_szsigcode;
static vm_object_t linux_shared_page_obj;
static char *linux_shared_page_mapping;
extern char _binary_linux_locore_o_start;
extern char _binary_linux_locore_o_end;
static vm_object_t linux_vdso_obj;
static char *linux_vdso_mapping;
extern char _binary_linux_vdso_so_o_start;
extern char _binary_linux_vdso_so_o_end;
static vm_offset_t linux_vdso_base;
extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
@ -94,6 +105,7 @@ static int linux_fixup_elf(uintptr_t *stack_base,
static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
static void linux_exec_setregs(struct thread *td,
struct image_params *imgp, uintptr_t stack);
static void linux_exec_sysvec_init(void *param);
static int linux_on_exec_vmspace(struct proc *p,
struct image_params *imgp);
static int linux_copyout_strings(struct image_params *imgp,
@ -101,6 +113,7 @@ static int linux_copyout_strings(struct image_params *imgp,
static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
static void linux_vdso_install(void *param);
static void linux_vdso_deinstall(void *param);
static void linux_vdso_reloc(char *mapping, Elf_Addr offset);
#define LINUX_T_UNKNOWN 255
static int _bsd_to_linux_trapcode[] = {
@ -142,9 +155,11 @@ static int _bsd_to_linux_trapcode[] = {
LINUX_T_UNKNOWN)
LINUX_VDSO_SYM_CHAR(linux_platform);
LINUX_VDSO_SYM_INTPTR(linux_sigcode);
LINUX_VDSO_SYM_INTPTR(linux_rt_sigcode);
LINUX_VDSO_SYM_INTPTR(linux_vsyscall);
LINUX_VDSO_SYM_INTPTR(__kernel_vsyscall);
LINUX_VDSO_SYM_INTPTR(__kernel_sigreturn);
LINUX_VDSO_SYM_INTPTR(__kernel_rt_sigreturn);
LINUX_VDSO_SYM_INTPTR(kern_timekeep_base);
LINUX_VDSO_SYM_INTPTR(kern_tsc_selector);
/*
* If FreeBSD & Linux have a difference of opinion about what a trap
@ -202,9 +217,8 @@ linux_copyout_auxargs(struct image_params *imgp, uintptr_t base)
argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP,
M_WAITOK | M_ZERO);
AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR,
imgp->proc->p_sysent->sv_shared_page_base);
AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux_vsyscall);
AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, linux_vdso_base);
AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, __kernel_vsyscall);
AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
/*
@ -468,7 +482,7 @@ linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
/* Build context to run handler in. */
regs->tf_esp = (int)fp;
regs->tf_eip = linux_rt_sigcode;
regs->tf_eip = __kernel_rt_sigreturn;
regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
regs->tf_cs = _ucodesel;
regs->tf_ds = _udatasel;
@ -570,7 +584,7 @@ linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
/* Build context to run handler in. */
regs->tf_esp = (int)fp;
regs->tf_eip = linux_sigcode;
regs->tf_eip = __kernel_sigreturn;
regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
regs->tf_cs = _ucodesel;
regs->tf_ds = _udatasel;
@ -817,7 +831,7 @@ struct sysentvec linux_sysvec = {
.sv_transtrap = linux_translate_traps,
.sv_fixup = linux_fixup,
.sv_sendsig = linux_sendsig,
.sv_sigcode = &_binary_linux_locore_o_start,
.sv_sigcode = &_binary_linux_vdso_so_o_start,
.sv_szsigcode = &linux_szsigcode,
.sv_name = "Linux a.out",
.sv_coredump = NULL,
@ -853,7 +867,7 @@ struct sysentvec elf_linux_sysvec = {
.sv_transtrap = linux_translate_traps,
.sv_fixup = linux_fixup_elf,
.sv_sendsig = linux_sendsig,
.sv_sigcode = &_binary_linux_locore_o_start,
.sv_sigcode = &_binary_linux_vdso_so_o_start,
.sv_szsigcode = &linux_szsigcode,
.sv_name = "Linux ELF32",
.sv_coredump = elf32_coredump,
@ -873,7 +887,7 @@ struct sysentvec elf_linux_sysvec = {
.sv_fixlimit = NULL,
.sv_maxssiz = NULL,
.sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP |
SV_SIG_DISCIGN | SV_SIG_WAITNDQ,
SV_SIG_DISCIGN | SV_SIG_WAITNDQ | SV_TIMEKEEP,
.sv_set_syscall_retval = linux_set_syscall_retval,
.sv_fetch_syscall_args = linux_fetch_syscall_args,
.sv_syscallnames = NULL,
@ -891,45 +905,128 @@ struct sysentvec elf_linux_sysvec = {
static int
linux_on_exec_vmspace(struct proc *p, struct image_params *imgp)
{
int error = 0;
linux_on_exec(p, imgp);
return (0);
if (SV_PROC_FLAG(p, SV_SHP) != 0)
error = linux_map_vdso(p, linux_vdso_obj,
linux_vdso_base, LINUX_VDSOPAGE_SIZE, imgp);
if (error == 0)
linux_on_exec(p, imgp);
return (error);
}
static void
linux_exec_sysvec_init(void *param)
{
l_uintptr_t *ktimekeep_base, *ktsc_selector;
struct sysentvec *sv;
ptrdiff_t tkoff;
sv = param;
/* Fill timekeep_base */
exec_sysvec_init(sv);
tkoff = kern_timekeep_base - linux_vdso_base;
ktimekeep_base = (l_uintptr_t *)(linux_vdso_mapping + tkoff);
*ktimekeep_base = sv->sv_timekeep_base;
tkoff = kern_tsc_selector - linux_vdso_base;
ktsc_selector = (l_uintptr_t *)(linux_vdso_mapping + tkoff);
*ktsc_selector = linux_vdso_tsc_selector_idx();
if (bootverbose)
printf("Linux i386 vDSO tsc_selector: %u\n", *ktsc_selector);
}
SYSINIT(elf_linux_exec_sysvec_init, SI_SUB_EXEC, SI_ORDER_ANY,
linux_exec_sysvec_init, &elf_linux_sysvec);
static void
linux_vdso_install(void *param)
{
char *vdso_start = &_binary_linux_vdso_so_o_start;
char *vdso_end = &_binary_linux_vdso_so_o_end;
linux_szsigcode = (&_binary_linux_locore_o_end -
&_binary_linux_locore_o_start);
linux_szsigcode = vdso_end - vdso_start;
MPASS(linux_szsigcode <= LINUX_VDSOPAGE_SIZE);
if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
panic("Linux invalid vdso size\n");
linux_vdso_base = LINUX_VDSOPAGE;
__elfN(linux_vdso_fixup)(&elf_linux_sysvec);
__elfN(linux_vdso_fixup)(vdso_start, linux_vdso_base);
linux_shared_page_obj = __elfN(linux_shared_page_init)
(&linux_shared_page_mapping);
linux_vdso_obj = __elfN(linux_shared_page_init)
(&linux_vdso_mapping, LINUX_VDSOPAGE_SIZE);
bcopy(vdso_start, linux_vdso_mapping, linux_szsigcode);
__elfN(linux_vdso_reloc)(&elf_linux_sysvec);
bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping,
linux_szsigcode);
elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
linux_vdso_reloc(linux_vdso_mapping, linux_vdso_base);
}
SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_FIRST,
linux_vdso_install, NULL);
static void
linux_vdso_deinstall(void *param)
{
__elfN(linux_shared_page_fini)(linux_shared_page_obj,
linux_shared_page_mapping);
__elfN(linux_shared_page_fini)(linux_vdso_obj,
linux_vdso_mapping, LINUX_VDSOPAGE_SIZE);
}
SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
linux_vdso_deinstall, NULL);
static void
linux_vdso_reloc(char *mapping, Elf_Addr offset)
{
const Elf_Shdr *shdr;
const Elf_Rel *rel;
const Elf_Ehdr *ehdr;
Elf_Addr *where;
Elf_Size rtype, symidx;
Elf_Addr addr, addend;
int i, relcnt;
MPASS(offset != 0);
relcnt = 0;
ehdr = (const Elf_Ehdr *)mapping;
shdr = (const Elf_Shdr *)(mapping + ehdr->e_shoff);
for (i = 0; i < ehdr->e_shnum; i++)
{
switch (shdr[i].sh_type) {
case SHT_REL:
rel = (const Elf_Rel *)(mapping + shdr[i].sh_offset);
relcnt = shdr[i].sh_size / sizeof(*rel);
break;
case SHT_RELA:
printf("Linux i386 vDSO: unexpected Rela section\n");
break;
}
}
for (i = 0; i < relcnt; i++, rel++) {
where = (Elf_Addr *)(mapping + rel->r_offset);
addend = *where;
rtype = ELF_R_TYPE(rel->r_info);
symidx = ELF_R_SYM(rel->r_info);
switch (rtype) {
case R_386_NONE: /* none */
break;
case R_386_RELATIVE: /* B + A */
addr = (Elf_Addr)PTROUT(offset + addend);
if (*where != addr)
*where = addr;
break;
case R_386_IRELATIVE:
printf("Linux i386 vDSO: unexpected ifunc relocation, "
"symbol index %d\n", symidx);
break;
default:
printf("Linux i386 vDSO: unexpected relocation type %d, "
"symbol index %d\n", rtype, symidx);
}
}
}
static char GNU_ABI_VENDOR[] = "GNU";
static int GNULINUX_ABI_DESC = 0;

View File

@ -51,15 +51,30 @@ PHDRS
eh_frame_hdr PT_GNU_EH_FRAME;
}
ENTRY(linux_vsyscall);
VERSION
{
LINUX_2.6 {
global:
__vdso_clock_gettime;
__vdso_gettimeofday;
__vdso_time;
__vdso_clock_getres;
__vdso_clock_gettime64;
};
LINUX_2.5 {
global:
linux_vsyscall;
linux_sigcode;
linux_rt_sigcode;
__kernel_vsyscall;
__kernel_sigreturn;
__kernel_rt_sigreturn;
local: *;
};
LINUX_0.0 {
global:
linux_platform;
kern_timekeep_base;
kern_tsc_selector;
local: *;
};
}

View File

@ -0,0 +1,145 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2021 Dmitry Chagin <dchagin@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/elf.h>
#include <sys/errno.h>
#include <sys/proc.h>
#include <sys/stddef.h>
#define _KERNEL
#include <sys/vdso.h>
#undef _KERNEL
#include <stdbool.h>
#include <strings.h>
#include <machine/atomic.h>
#include <machine/stdarg.h>
#include <i386/linux/linux.h>
#include <i386/linux/linux_syscall.h>
#include <compat/linux/linux_errno.h>
#include <compat/linux/linux_timer.h>
/* The kernel fixup this at vDSO install */
uintptr_t *kern_timekeep_base = NULL;
uint32_t kern_tsc_selector = 0;
#include <x86/linux/linux_vdso_gettc_x86.inc>
static int
write(int fd, const void *buf, size_t size)
{
int res;
__asm__ __volatile__
(
"int $0x80"
: "=a"(res)
: "a"(LINUX_SYS_write), "b"(fd), "c"(buf), "d"(size)
: "cc", "memory"
);
return (res);
}
static int
__vdso_clock_gettime_fallback(clockid_t clock_id, struct l_timespec *ts)
{
int res;
__asm__ __volatile__
(
"int $0x80"
: "=a"(res)
: "a"(LINUX_SYS_linux_clock_gettime), "b"(clock_id), "c"(ts)
: "cc", "memory"
);
return (res);
}
static int
__vdso_clock_gettime64_fallback(clockid_t clock_id, struct l_timespec64 *ts)
{
int res;
__asm__ __volatile__
(
"int $0x80"
: "=a"(res)
: "a"(LINUX_SYS_linux_clock_gettime64), "b"(clock_id), "c"(ts)
: "cc", "memory"
);
return (res);
}
static int
__vdso_gettimeofday_fallback(l_timeval *tv, struct timezone *tz)
{
int res;
__asm__ __volatile__
(
"int $0x80"
: "=a"(res)
: "a"(LINUX_SYS_gettimeofday), "b"(tv), "c"(tz)
: "cc", "memory"
);
return (res);
}
static int
__vdso_clock_getres_fallback(clockid_t clock_id, struct l_timespec *ts)
{
int res;
__asm__ __volatile__
(
"int $0x80"
: "=a"(res)
: "a"(LINUX_SYS_linux_clock_getres), "b"(clock_id), "c"(ts)
: "cc", "memory"
);
return (res);
}
static int
__vdso_time_fallback(long *tm)
{
int res;
__asm__ __volatile__
(
"int $0x80"
: "=a"(res)
: "a"(LINUX_SYS_linux_time), "b"(tm)
: "cc", "memory"
);
return (res);
}
#include <compat/linux/linux_vdso_gtod.inc>

View File

@ -10,8 +10,6 @@ CFLAGS+=-DCOMPAT_FREEBSD32 -DCOMPAT_LINUX32
.PATH: ${SRCTOP}/sys/x86/linux
.endif
VDSO= linux${SFX}_vdso
KMOD= linux
SRCS= linux_fork.c linux${SFX}_dummy_machdep.c linux_file.c linux_event.c \
linux_futex.c linux_getcwd.c linux_ioctl.c linux_ipc.c \
@ -22,7 +20,8 @@ SRCS= linux_fork.c linux${SFX}_dummy_machdep.c linux_file.c linux_event.c \
opt_inet6.h opt_compat.h opt_posix.h opt_usb.h vnode_if.h \
device_if.h bus_if.h
.if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64"
SRCS+= linux_dummy_x86.c
SRCS+= linux_dummy_x86.c linux_vdso_tsc_selector_x86.c
VDSODEPS=linux_vdso_gettc_x86.inc
.endif
.if ${MACHINE_CPUARCH} == "amd64"
SRCS+= linux${SFX}_support.s
@ -38,7 +37,7 @@ SRCS+= opt_kstack_pages.h opt_nfs.h opt_hwpmc_hooks.h
SRCS+= opt_apic.h
.endif
OBJS= ${VDSO}.so
OBJS= linux${SFX}_vdso.so
.if ${MACHINE_CPUARCH} == "i386"
SRCS+= linux_ptrace.c imgact_linux.c linux_util.c linux_mib.c linux_mmap.c \
@ -55,33 +54,54 @@ EXPORT_SYMS+= linux_ioctl_unregister_handler
.endif
CLEANFILES= linux${SFX}_assym.h linux${SFX}_genassym.o linux${SFX}_locore.o \
genassym.o
genassym.o linux${SFX}_vdso_gtod.o linux${SFX}_vdso.so.o
linux${SFX}_assym.h: linux${SFX}_genassym.o
sh ${SYSDIR}/kern/genassym.sh linux${SFX}_genassym.o > ${.TARGET}
.if ${MACHINE_CPUARCH} == "amd64"
VDSOFLAGS=-DCOMPAT_FREEBSD32 -DCOMPAT_LINUX32 -m32
.endif
linux${SFX}_locore.o: linux${SFX}_assym.h assym.inc
${CC} ${CCLDFLAGS} -x assembler-with-cpp -DLOCORE -m32 -shared -s \
-pipe -I. -I${SYSDIR} ${WERROR} -Wall -fno-common -nostdinc -nostdlib \
-fno-omit-frame-pointer -fPIC \
-Wl,-T${SRCTOP}/sys/${MACHINE_CPUARCH}/linux${SFX}/${VDSO}.lds.s \
-Wl,-soname=${VDSO}.so.1,--eh-frame-hdr,-warn-common \
${CC} -c -x assembler-with-cpp -DLOCORE -fPIC -pipe -O2 -Werror \
-msoft-float -mregparm=0 \
-mcmodel=small -fno-common -nostdinc -fasynchronous-unwind-tables \
-fno-omit-frame-pointer -foptimize-sibling-calls ${VDSOFLAGS} \
-fno-stack-protector -I. -I${SYSDIR} -I${SRCTOP}/include \
${.IMPSRC} -o ${.TARGET}
linux${SFX}_vdso_gtod.o: linux_vdso_gtod.inc ${VDSODEPS}
${CC} -c -fPIC -pipe -O2 -Werror -msoft-float -mregparm=0 \
-mcmodel=small -fno-common -nostdinc -fasynchronous-unwind-tables \
-fno-omit-frame-pointer -foptimize-sibling-calls ${VDSOFLAGS} \
-fno-stack-protector -I. -I${SYSDIR} -I${SRCTOP}/include \
${.IMPSRC} -o ${.TARGET}
linux${SFX}_vdso.so.o: linux${SFX}_locore.o linux${SFX}_vdso_gtod.o
${LD} -m elf_i386 --shared --eh-frame-hdr -soname=linux-gate.so.1 \
--no-undefined --hash-style=both -warn-common -nostdlib \
--strip-debug -s --build-id=sha1 --Bsymbolic \
-T${SRCTOP}/sys/${MACHINE}/linux${SFX}/linux${SFX}_vdso.lds.s \
-o ${.TARGET} ${.ALLSRC:M*.o}
.if ${MACHINE_CPUARCH} == "amd64"
OBJCOPY_TARGET=--output-target elf64-x86-64-freebsd --binary-architecture i386
.elif ${MACHINE_CPUARCH} == "i386"
OBJCOPY_TARGET=--output-target elf32-i386-freebsd --binary-architecture i386
.else
.error ${MACHINE_CPUARCH} not yet supported by linux
.endif
linux${SFX}_vdso.so: linux${SFX}_vdso.so.o
${OBJCOPY} --input-target binary ${OBJCOPY_TARGET} \
linux${SFX}_vdso.so.o ${.TARGET}
${STRIPBIN} -N _binary_linux${SFX}_vdso_so_o_size ${.TARGET}
.if ${MACHINE_CPUARCH} == "amd64"
linux${SFX}_support.o: linux${SFX}_assym.h assym.inc
${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \
${.IMPSRC} -o ${.TARGET}
${VDSO}.so: linux${SFX}_locore.o
${OBJCOPY} --input-target binary --output-target elf64-x86-64-freebsd \
--binary-architecture i386 linux${SFX}_locore.o ${.TARGET}
${STRIPBIN} -N _binary_linux${SFX}_locore_o_size ${.TARGET}
.else
${VDSO}.so: linux${SFX}_locore.o
${OBJCOPY} --input-target binary --output-target elf32-i386-freebsd \
--binary-architecture i386 linux${SFX}_locore.o ${.TARGET}
${STRIPBIN} -N _binary_linux_locore_o_size ${.TARGET}
.endif
linux${SFX}_genassym.o: offset.inc

View File

@ -5,8 +5,6 @@
.PATH: ${SRCTOP}/sys/x86/linux
.endif
VDSO= linux_vdso
KMOD= linux64
SRCS= linux_elf64.c linux_fork.c linux_dummy_machdep.c linux_file.c \
linux_event.c linux_futex.c linux_getcwd.c linux_ioctl.c linux_ipc.c \
@ -17,7 +15,7 @@ SRCS= linux_elf64.c linux_fork.c linux_dummy_machdep.c linux_file.c \
vnode_if.h device_if.h bus_if.h \
linux_support.s
.if ${MACHINE_CPUARCH} == "amd64"
SRCS+= linux_dummy_x86.c
SRCS+= linux_dummy_x86.c linux_vdso_tsc_selector_x86.c
.endif
DPSRCS= assym.inc linux_genassym.c
@ -25,20 +23,44 @@ DPSRCS= assym.inc linux_genassym.c
SRCS+= opt_kstack_pages.h opt_nfs.h opt_hwpmc_hooks.h
CLEANFILES= linux_assym.h linux_genassym.o linux_locore.o \
genassym.o
genassym.o linux_vdso_gtod.o linux_vdso.so.o
OBJS= ${VDSO}.so
OBJS= linux_vdso.so
linux_assym.h: linux_genassym.o
sh ${SYSDIR}/kern/genassym.sh linux_genassym.o > ${.TARGET}
linux_locore.o: linux_locore.asm linux_assym.h
${CC} ${CCLDFLAGS} -x assembler-with-cpp -DLOCORE -shared -mcmodel=small \
-pipe -I. -I${SYSDIR} ${WERROR} -Wall -fno-common -fPIC -nostdinc \
-Wl,-T${SRCTOP}/sys/${MACHINE}/linux/${VDSO}.lds.s \
-Wl,-soname=${VDSO}.so.1,-warn-common -nostdlib \
.if ${MACHINE_CPUARCH} == "amd64"
VDSOFLAGS=-mregparm=0 -mcmodel=small -msoft-float
VDSODEPS=linux_vdso_gettc_x86.inc
.elif ${MACHINE_CPUARCH} == "aarch64"
# The Linux uses tiny memory model, but our ld does not know about
# some of relocation types which is generated by cc
VDSOFLAGS=-mgeneral-regs-only -mcmodel=small -ffixed-x18
.endif
linux_locore.o: linux_assym.h assym.inc
${CC} -c -x assembler-with-cpp -DLOCORE \
-fPIC -pipe -O2 -Werror ${VDSOFLAGS} \
-nostdinc -fasynchronous-unwind-tables \
-fno-omit-frame-pointer -foptimize-sibling-calls \
-fno-stack-protector -I. -I${SYSDIR} -I${SRCTOP}/include \
${.IMPSRC} -o ${.TARGET}
linux_vdso_gtod.o: linux_vdso_gtod.inc ${VDSODEPS}
${CC} -c -fPIC -pipe -O2 -Werror ${VDSOFLAGS} \
-nostdinc -fasynchronous-unwind-tables \
-fno-omit-frame-pointer -foptimize-sibling-calls \
-fno-stack-protector -I. -I${SYSDIR} -I${SRCTOP}/include \
${.IMPSRC} -o ${.TARGET}
linux_vdso.so.o: linux_locore.o linux_vdso_gtod.o
${LD} --shared --eh-frame-hdr -soname=linux-vdso.so.1 \
--no-undefined --hash-style=both -warn-common -nostdlib \
--strip-debug -s --build-id=sha1 -Bsymbolic \
-T${SRCTOP}/sys/${MACHINE}/linux/linux_vdso.lds.s \
-o ${.TARGET} ${.ALLSRC:M*.o}
.if ${MACHINE_CPUARCH} == "aarch64"
OBJCOPY_TARGET=--output-target elf64-littleaarch64 --binary-architecture aarch64
.elif ${MACHINE_CPUARCH} == "amd64"
@ -46,10 +68,11 @@ OBJCOPY_TARGET=--output-target elf64-x86-64 --binary-architecture i386:x86-64
.else
.error ${MACHINE_CPUARCH} not yet supported by linux64
.endif
${VDSO}.so: linux_locore.o
linux_vdso.so: linux_vdso.so.o
${OBJCOPY} --input-target binary ${OBJCOPY_TARGET} \
linux_locore.o ${.TARGET}
${STRIPBIN} -N _binary_linux_locore_o_size ${.TARGET}
linux_vdso.so.o ${.TARGET}
${STRIPBIN} -N _binary_linux_vdso_so_o_size ${.TARGET}
linux_support.o: assym.inc linux_assym.h
${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \

View File

@ -0,0 +1,164 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2012 Konstantin Belousov <kib@FreeBSD.org>
* Copyright (c) 2016, 2017, 2019 The FreeBSD Foundation
* Copyright (c) 2021 Dmitry Chagin <dchagin@FreeBSD.org>
*
* Portions of this software were developed by Konstantin Belousov
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#if defined(__i386__) || defined(COMPAT_LINUX32)
#include <i386/include/atomic.h>
#include <i386/include/cpufunc.h>
#else
#include <amd64/include/atomic.h>
#include <amd64/include/cpufunc.h>
#endif
static inline u_int
rdtsc_low(const struct vdso_timehands *th)
{
u_int rv;
__asm __volatile("rdtsc; shrd %%cl, %%edx, %0"
: "=a" (rv) : "c" (th->th_x86_shift) : "edx");
return (rv);
}
static inline u_int
rdtscp_low(const struct vdso_timehands *th)
{
u_int rv;
__asm __volatile("rdtscp; movl %%edi,%%ecx; shrd %%cl, %%edx, %0"
: "=a" (rv) : "D" (th->th_x86_shift) : "ecx", "edx");
return (rv);
}
static u_int
rdtsc_low_mb_lfence(const struct vdso_timehands *th)
{
lfence();
return (rdtsc_low(th));
}
static u_int
rdtsc_low_mb_mfence(const struct vdso_timehands *th)
{
mfence();
return (rdtsc_low(th));
}
static u_int
rdtsc_low_mb_none(const struct vdso_timehands *th)
{
return (rdtsc_low(th));
}
static u_int
rdtsc32_mb_lfence(void)
{
lfence();
return (rdtsc32());
}
static u_int
rdtsc32_mb_mfence(void)
{
mfence();
return (rdtsc32());
}
static u_int
rdtsc32_mb_none(void)
{
return (rdtsc32());
}
static u_int
rdtscp32_(void)
{
return (rdtscp32());
}
struct tsc_selector_tag {
u_int (*ts_rdtsc32)(void);
u_int (*ts_rdtsc_low)(const struct vdso_timehands *);
};
static const struct tsc_selector_tag tsc_selector[] = {
[0] = { /* Intel, LFENCE */
.ts_rdtsc32 = rdtsc32_mb_lfence,
.ts_rdtsc_low = rdtsc_low_mb_lfence,
},
[1] = { /* AMD, MFENCE */
.ts_rdtsc32 = rdtsc32_mb_mfence,
.ts_rdtsc_low = rdtsc_low_mb_mfence,
},
[2] = { /* No SSE2 */
.ts_rdtsc32 = rdtsc32_mb_none,
.ts_rdtsc_low = rdtsc_low_mb_none,
},
[3] = { /* RDTSCP */
.ts_rdtsc32 = rdtscp32_,
.ts_rdtsc_low = rdtscp_low,
},
};
static u_int
__vdso_gettc_rdtsc_low(const struct vdso_timehands *th)
{
return (tsc_selector[kern_tsc_selector].ts_rdtsc_low(th));
}
static u_int
__vdso_gettc_rdtsc32(void)
{
return (tsc_selector[kern_tsc_selector].ts_rdtsc32());
}
int
__vdso_gettc(const struct vdso_timehands *th, u_int *tc)
{
switch (th->th_algo) {
case VDSO_TH_ALGO_X86_TSC:
*tc = th->th_x86_shift > 0 ? __vdso_gettc_rdtsc_low(th) :
__vdso_gettc_rdtsc32();
return (0);
case VDSO_TH_ALGO_X86_HPET:
/* TODO */
default:
return (ENOSYS);
}
}

View File

@ -0,0 +1,57 @@
/*-
* Copyright (c) 2012 Konstantin Belousov <kib@FreeBSD.org>
* Copyright (c) 2016, 2017, 2019 The FreeBSD Foundation
* Copyright (c) 2021 Dmitry Chagin <dchagin@FreeBSD.org>
*
* Portions of this software were developed by Konstantin Belousov
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <x86/cputypes.h>
#include <x86/x86_var.h>
#include <x86/specialreg.h>
#include <x86/linux/linux_x86.h>
int
linux_vdso_tsc_selector_idx()
{
bool amd_cpu;
if (cpu_feature == 0)
return (2); /* should not happen due to RDTSC */
amd_cpu = (cpu_vendor_id == CPU_VENDOR_AMD ||
cpu_vendor_id == CPU_VENDOR_HYGON);
if ((amd_feature & AMDID_RDTSCP) != 0)
return (3);
if ((cpu_feature & CPUID_SSE2) == 0)
return (2);
return (amd_cpu ? 1 : 0);
}

33
sys/x86/linux/linux_x86.h Normal file
View File

@ -0,0 +1,33 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2021 Dmitry Chagin <dchagin@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef _X86_INCLUDE_LINUX_LINUX_X86_H_
#define _X86_INCLUDE_LINUX_LINUX_X86_H_
int linux_vdso_tsc_selector_idx(void);
#endif /* _X86_INCLUDE_LINUX_LINUX_X86_H_ */