pvclock: Add vDSO support
Add vDSO support for timekeeping devices that support the KVM/XEN paravirtual clock API. Also, expose, in the userspace-accessible '<machine/pvclock.h>', definitions that will be needed by 'libc' to support 'VDSO_TH_ALGO_X86_PVCLK'. Sponsored by: Juniper Networks, Inc. Sponsored by: Klara, Inc. Reviewed by: kib Differential Revision: https://reviews.freebsd.org/D31418
This commit is contained in:
parent
6c69c6bb4c
commit
d4b2d3035a
@ -156,6 +156,8 @@ hpet_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc)
|
||||
vdso_th->th_algo = VDSO_TH_ALGO_X86_HPET;
|
||||
vdso_th->th_x86_shift = 0;
|
||||
vdso_th->th_x86_hpet_idx = device_get_unit(sc->dev);
|
||||
vdso_th->th_x86_pvc_last_systime = 0;
|
||||
vdso_th->th_x86_pvc_stable_mask = 0;
|
||||
bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
|
||||
return (sc->mmap_allow != 0);
|
||||
}
|
||||
@ -171,6 +173,8 @@ hpet_vdso_timehands32(struct vdso_timehands32 *vdso_th32,
|
||||
vdso_th32->th_algo = VDSO_TH_ALGO_X86_HPET;
|
||||
vdso_th32->th_x86_shift = 0;
|
||||
vdso_th32->th_x86_hpet_idx = device_get_unit(sc->dev);
|
||||
vdso_th32->th_x86_pvc_last_systime = 0;
|
||||
vdso_th32->th_x86_pvc_stable_mask = 0;
|
||||
bzero(vdso_th32->th_res, sizeof(vdso_th32->th_res));
|
||||
return (sc->mmap_allow != 0);
|
||||
}
|
||||
|
@ -128,6 +128,8 @@ hyperv_tsc_vdso_timehands(struct vdso_timehands *vdso_th,
|
||||
vdso_th->th_algo = VDSO_TH_ALGO_X86_HVTSC;
|
||||
vdso_th->th_x86_shift = 0;
|
||||
vdso_th->th_x86_hpet_idx = 0;
|
||||
vdso_th->th_x86_pvc_last_systime = 0;
|
||||
vdso_th->th_x86_pvc_stable_mask = 0;
|
||||
bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
|
||||
return (1);
|
||||
}
|
||||
|
@ -30,7 +30,12 @@
|
||||
#define X86_PVCLOCK
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/timetc.h>
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#define PVCLOCK_CDEVNAME "pvclock"
|
||||
|
||||
struct pvclock_vcpu_time_info {
|
||||
uint32_t version;
|
||||
@ -46,6 +51,59 @@ struct pvclock_vcpu_time_info {
|
||||
#define PVCLOCK_FLAG_TSC_STABLE 0x01
|
||||
#define PVCLOCK_FLAG_GUEST_PASUED 0x02
|
||||
|
||||
/*
|
||||
* Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
|
||||
* yielding a 64-bit result.
|
||||
*/
|
||||
static inline uint64_t
|
||||
pvclock_scale_delta(uint64_t delta, uint32_t mul_frac, int shift)
|
||||
{
|
||||
uint64_t product;
|
||||
|
||||
if (shift < 0)
|
||||
delta >>= -shift;
|
||||
else
|
||||
delta <<= shift;
|
||||
#if defined(__i386__)
|
||||
{
|
||||
uint32_t tmp1, tmp2;
|
||||
|
||||
/**
|
||||
* For i386, the formula looks like:
|
||||
*
|
||||
* lower = (mul_frac * (delta & UINT_MAX)) >> 32
|
||||
* upper = mul_frac * (delta >> 32)
|
||||
* product = lower + upper
|
||||
*/
|
||||
__asm__ (
|
||||
"mul %5 ; "
|
||||
"mov %4,%%eax ; "
|
||||
"mov %%edx,%4 ; "
|
||||
"mul %5 ; "
|
||||
"xor %5,%5 ; "
|
||||
"add %4,%%eax ; "
|
||||
"adc %5,%%edx ; "
|
||||
: "=A" (product), "=r" (tmp1), "=r" (tmp2)
|
||||
: "a" ((uint32_t)delta), "1" ((uint32_t)(delta >> 32)),
|
||||
"2" (mul_frac) );
|
||||
}
|
||||
#elif defined(__amd64__)
|
||||
{
|
||||
unsigned long tmp;
|
||||
|
||||
__asm__ (
|
||||
"mulq %[mul_frac] ; shrd $32, %[hi], %[lo]"
|
||||
: [lo]"=a" (product), [hi]"=d" (tmp)
|
||||
: "0" (delta), [mul_frac]"rm"((uint64_t)mul_frac));
|
||||
}
|
||||
#else
|
||||
#error "pvclock: unsupported x86 architecture?"
|
||||
#endif
|
||||
return (product);
|
||||
}
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
typedef struct pvclock_wall_clock *pvclock_get_wallclock_t(void *arg);
|
||||
|
||||
struct pvclock_wall_clock {
|
||||
@ -62,7 +120,9 @@ struct pvclock {
|
||||
bool stable_flag_supported;
|
||||
|
||||
/* Private; initialized by the 'pvclock' API: */
|
||||
bool vdso_force_unstable;
|
||||
struct timecounter tc;
|
||||
struct cdev *cdev;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -81,4 +141,6 @@ void pvclock_init(struct pvclock *pvc, device_t dev,
|
||||
void pvclock_gettime(struct pvclock *pvc, struct timespec *ts);
|
||||
int pvclock_destroy(struct pvclock *pvc);
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#endif
|
||||
|
@ -37,11 +37,14 @@
|
||||
#define VDSO_TIMEHANDS_MD \
|
||||
uint32_t th_x86_shift; \
|
||||
uint32_t th_x86_hpet_idx; \
|
||||
uint32_t th_res[6];
|
||||
uint64_t th_x86_pvc_last_systime;\
|
||||
uint8_t th_x86_pvc_stable_mask; \
|
||||
uint8_t th_res[15];
|
||||
|
||||
#define VDSO_TH_ALGO_X86_TSC VDSO_TH_ALGO_1
|
||||
#define VDSO_TH_ALGO_X86_HPET VDSO_TH_ALGO_2
|
||||
#define VDSO_TH_ALGO_X86_HVTSC VDSO_TH_ALGO_3 /* Hyper-V ref. TSC */
|
||||
#define VDSO_TH_ALGO_X86_PVCLK VDSO_TH_ALGO_4 /* KVM/XEN paravirtual clock */
|
||||
|
||||
#ifdef _KERNEL
|
||||
#ifdef COMPAT_FREEBSD32
|
||||
|
@ -31,11 +31,22 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/bus.h>
|
||||
#include <sys/clock.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/fcntl.h>
|
||||
#include <sys/limits.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/smp.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/vdso.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h>
|
||||
|
||||
#include <machine/atomic.h>
|
||||
#include <machine/cpufunc.h>
|
||||
#include <machine/md_var.h>
|
||||
#include <machine/pvclock.h>
|
||||
|
||||
@ -54,6 +65,22 @@ static void pvclock_read_time_info(
|
||||
static void pvclock_read_wall_clock(struct pvclock_wall_clock *wc,
|
||||
struct timespec *ts);
|
||||
static u_int pvclock_tc_get_timecount(struct timecounter *tc);
|
||||
static uint32_t pvclock_tc_vdso_timehands(
|
||||
struct vdso_timehands *vdso_th, struct timecounter *tc);
|
||||
#ifdef COMPAT_FREEBSD32
|
||||
static uint32_t pvclock_tc_vdso_timehands32(
|
||||
struct vdso_timehands32 *vdso_th, struct timecounter *tc);
|
||||
#endif
|
||||
|
||||
static d_open_t pvclock_cdev_open;
|
||||
static d_mmap_t pvclock_cdev_mmap;
|
||||
|
||||
static struct cdevsw pvclock_cdev_cdevsw = {
|
||||
.d_version = D_VERSION,
|
||||
.d_name = PVCLOCK_CDEVNAME,
|
||||
.d_open = pvclock_cdev_open,
|
||||
.d_mmap = pvclock_cdev_mmap,
|
||||
};
|
||||
|
||||
void
|
||||
pvclock_resume(void)
|
||||
@ -74,57 +101,6 @@ pvclock_tsc_freq(struct pvclock_vcpu_time_info *ti)
|
||||
return (freq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
|
||||
* yielding a 64-bit result.
|
||||
*/
|
||||
static inline uint64_t
|
||||
pvclock_scale_delta(uint64_t delta, uint32_t mul_frac, int shift)
|
||||
{
|
||||
uint64_t product;
|
||||
|
||||
if (shift < 0)
|
||||
delta >>= -shift;
|
||||
else
|
||||
delta <<= shift;
|
||||
#if defined(__i386__)
|
||||
{
|
||||
uint32_t tmp1, tmp2;
|
||||
|
||||
/**
|
||||
* For i386, the formula looks like:
|
||||
*
|
||||
* lower = (mul_frac * (delta & UINT_MAX)) >> 32
|
||||
* upper = mul_frac * (delta >> 32)
|
||||
* product = lower + upper
|
||||
*/
|
||||
__asm__ (
|
||||
"mul %5 ; "
|
||||
"mov %4,%%eax ; "
|
||||
"mov %%edx,%4 ; "
|
||||
"mul %5 ; "
|
||||
"xor %5,%5 ; "
|
||||
"add %4,%%eax ; "
|
||||
"adc %5,%%edx ; "
|
||||
: "=A" (product), "=r" (tmp1), "=r" (tmp2)
|
||||
: "a" ((uint32_t)delta), "1" ((uint32_t)(delta >> 32)),
|
||||
"2" (mul_frac) );
|
||||
}
|
||||
#elif defined(__amd64__)
|
||||
{
|
||||
unsigned long tmp;
|
||||
|
||||
__asm__ (
|
||||
"mulq %[mul_frac] ; shrd $32, %[hi], %[lo]"
|
||||
: [lo]"=a" (product), [hi]"=d" (tmp)
|
||||
: "0" (delta), [mul_frac]"rm"((uint64_t)mul_frac));
|
||||
}
|
||||
#else
|
||||
#error "pvclock: unsupported x86 architecture?"
|
||||
#endif
|
||||
return (product);
|
||||
}
|
||||
|
||||
static void
|
||||
pvclock_read_time_info(struct pvclock_vcpu_time_info *ti,
|
||||
uint64_t *ns, uint8_t *flags)
|
||||
@ -213,6 +189,27 @@ pvclock_get_wallclock(struct pvclock_wall_clock *wc, struct timespec *ts)
|
||||
pvclock_read_wall_clock(wc, ts);
|
||||
}
|
||||
|
||||
static int
|
||||
pvclock_cdev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
|
||||
{
|
||||
if (oflags & FWRITE)
|
||||
return (EPERM);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
pvclock_cdev_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr,
|
||||
int nprot, vm_memattr_t *memattr)
|
||||
{
|
||||
if (offset >= mp_ncpus * sizeof(struct pvclock_vcpu_time_info))
|
||||
return (EINVAL);
|
||||
if (PROT_EXTRACT(nprot) != PROT_READ)
|
||||
return (EACCES);
|
||||
*paddr = vtophys((uintptr_t)dev->si_drv1 + offset);
|
||||
*memattr = VM_MEMATTR_DEFAULT;
|
||||
return (0);
|
||||
}
|
||||
|
||||
static u_int
|
||||
pvclock_tc_get_timecount(struct timecounter *tc)
|
||||
{
|
||||
@ -221,6 +218,42 @@ pvclock_tc_get_timecount(struct timecounter *tc)
|
||||
return (pvclock_getsystime(pvc) & UINT_MAX);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
pvclock_tc_vdso_timehands(struct vdso_timehands *vdso_th,
|
||||
struct timecounter *tc)
|
||||
{
|
||||
struct pvclock *pvc = tc->tc_priv;
|
||||
|
||||
vdso_th->th_algo = VDSO_TH_ALGO_X86_PVCLK;
|
||||
vdso_th->th_x86_shift = 0;
|
||||
vdso_th->th_x86_hpet_idx = 0;
|
||||
vdso_th->th_x86_pvc_last_systime =
|
||||
atomic_load_acq_64(&pvclock_last_systime);
|
||||
vdso_th->th_x86_pvc_stable_mask = !pvc->vdso_force_unstable &&
|
||||
pvc->stable_flag_supported ? PVCLOCK_FLAG_TSC_STABLE : 0;
|
||||
bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
|
||||
return (pvc->cdev != NULL && amd_feature & AMDID_RDTSCP);
|
||||
}
|
||||
|
||||
#ifdef COMPAT_FREEBSD32
|
||||
static uint32_t
|
||||
pvclock_tc_vdso_timehands32(struct vdso_timehands32 *vdso_th,
|
||||
struct timecounter *tc)
|
||||
{
|
||||
struct pvclock *pvc = tc->tc_priv;
|
||||
|
||||
vdso_th->th_algo = VDSO_TH_ALGO_X86_PVCLK;
|
||||
vdso_th->th_x86_shift = 0;
|
||||
vdso_th->th_x86_hpet_idx = 0;
|
||||
vdso_th->th_x86_pvc_last_systime =
|
||||
atomic_load_acq_64(&pvclock_last_systime);
|
||||
vdso_th->th_x86_pvc_stable_mask = !pvc->vdso_force_unstable &&
|
||||
pvc->stable_flag_supported ? PVCLOCK_FLAG_TSC_STABLE : 0;
|
||||
bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
|
||||
return (pvc->cdev != NULL && amd_feature & AMDID_RDTSCP);
|
||||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
pvclock_gettime(struct pvclock *pvc, struct timespec *ts)
|
||||
{
|
||||
@ -238,9 +271,19 @@ void
|
||||
pvclock_init(struct pvclock *pvc, device_t dev, const char *tc_name,
|
||||
int tc_quality, u_int tc_flags)
|
||||
{
|
||||
struct make_dev_args mda;
|
||||
int err;
|
||||
|
||||
KASSERT(((uintptr_t)pvc->timeinfos & PAGE_MASK) == 0,
|
||||
("Specified time info page(s) address is not page-aligned."));
|
||||
|
||||
/* Set up vDSO stable-flag suppression test facility: */
|
||||
pvc->vdso_force_unstable = false;
|
||||
SYSCTL_ADD_BOOL(device_get_sysctl_ctx(dev),
|
||||
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
|
||||
"vdso_force_unstable", CTLFLAG_RW, &pvc->vdso_force_unstable, 0,
|
||||
"Forcibly deassert stable flag in vDSO codepath");
|
||||
|
||||
/* Set up timecounter and timecounter-supporting members: */
|
||||
pvc->tc.tc_get_timecount = pvclock_tc_get_timecount;
|
||||
pvc->tc.tc_poll_pps = NULL;
|
||||
@ -250,11 +293,27 @@ pvclock_init(struct pvclock *pvc, device_t dev, const char *tc_name,
|
||||
pvc->tc.tc_quality = tc_quality;
|
||||
pvc->tc.tc_flags = tc_flags;
|
||||
pvc->tc.tc_priv = pvc;
|
||||
pvc->tc.tc_fill_vdso_timehands = NULL;
|
||||
pvc->tc.tc_fill_vdso_timehands = pvclock_tc_vdso_timehands;
|
||||
#ifdef COMPAT_FREEBSD32
|
||||
pvc->tc.tc_fill_vdso_timehands32 = NULL;
|
||||
pvc->tc.tc_fill_vdso_timehands32 = pvclock_tc_vdso_timehands32;
|
||||
#endif
|
||||
|
||||
/* Set up cdev for userspace mmapping of vCPU 0 time info page: */
|
||||
make_dev_args_init(&mda);
|
||||
mda.mda_devsw = &pvclock_cdev_cdevsw;
|
||||
mda.mda_uid = UID_ROOT;
|
||||
mda.mda_gid = GID_WHEEL;
|
||||
mda.mda_mode = 0444;
|
||||
mda.mda_si_drv1 = pvc->timeinfos;
|
||||
err = make_dev_s(&mda, &pvc->cdev, PVCLOCK_CDEVNAME);
|
||||
if (err != 0) {
|
||||
device_printf(dev, "Could not create /dev/%s, error %d. Fast "
|
||||
"time of day will be unavailable for this timecounter.\n",
|
||||
PVCLOCK_CDEVNAME, err);
|
||||
KASSERT(pvc->cdev == NULL,
|
||||
("Failed make_dev_s() unexpectedly inited cdev."));
|
||||
}
|
||||
|
||||
/* Register timecounter: */
|
||||
tc_init(&pvc->tc);
|
||||
|
||||
|
@ -870,6 +870,8 @@ x86_tsc_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc)
|
||||
vdso_th->th_algo = VDSO_TH_ALGO_X86_TSC;
|
||||
vdso_th->th_x86_shift = (int)(intptr_t)tc->tc_priv;
|
||||
vdso_th->th_x86_hpet_idx = 0xffffffff;
|
||||
vdso_th->th_x86_pvc_last_systime = 0;
|
||||
vdso_th->th_x86_pvc_stable_mask = 0;
|
||||
bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
|
||||
return (1);
|
||||
}
|
||||
@ -883,6 +885,8 @@ x86_tsc_vdso_timehands32(struct vdso_timehands32 *vdso_th32,
|
||||
vdso_th32->th_algo = VDSO_TH_ALGO_X86_TSC;
|
||||
vdso_th32->th_x86_shift = (int)(intptr_t)tc->tc_priv;
|
||||
vdso_th32->th_x86_hpet_idx = 0xffffffff;
|
||||
vdso_th32->th_x86_pvc_last_systime = 0;
|
||||
vdso_th32->th_x86_pvc_stable_mask = 0;
|
||||
bzero(vdso_th32->th_res, sizeof(vdso_th32->th_res));
|
||||
return (1);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user