a0b8ff0c54
directly accessed. Although this will work on some platforms, it can throw an exception if the pointer is invalid and then panic the kernel. Add a missing SYSCTL_IN() of "SCTP_BASE_STATS" structure. MFC after: 3 days Sponsored by: Mellanox Technologies
483 lines
12 KiB
C
483 lines
12 KiB
C
/*-
|
|
* Copyright (c) 2011 The University of Melbourne
|
|
* All rights reserved.
|
|
*
|
|
* This software was developed by Julien Ridoux at the University of Melbourne
|
|
* under sponsorship from the FreeBSD Foundation.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#include "opt_ffclock.h"
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/bus.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/lock.h>
|
|
#include <sys/module.h>
|
|
#include <sys/mutex.h>
|
|
#include <sys/priv.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/sbuf.h>
|
|
#include <sys/sysent.h>
|
|
#include <sys/sysproto.h>
|
|
#include <sys/sysctl.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/timeffc.h>
|
|
|
|
#ifdef FFCLOCK
|
|
|
|
FEATURE(ffclock, "Feed-forward clock support");
|
|
|
|
extern struct ffclock_estimate ffclock_estimate;
|
|
extern struct bintime ffclock_boottime;
|
|
extern int8_t ffclock_updated;
|
|
extern struct mtx ffclock_mtx;
|
|
|
|
/*
|
|
* Feed-forward clock absolute time. This should be the preferred way to read
|
|
* the feed-forward clock for "wall-clock" type time. The flags allow to compose
|
|
* various flavours of absolute time (e.g. with or without leap seconds taken
|
|
* into account). If valid pointers are provided, the ffcounter value and an
|
|
* upper bound on clock error associated with the bintime are provided.
|
|
* NOTE: use ffclock_convert_abs() to differ the conversion of a ffcounter value
|
|
* read earlier.
|
|
*/
|
|
void
|
|
ffclock_abstime(ffcounter *ffcount, struct bintime *bt,
|
|
struct bintime *error_bound, uint32_t flags)
|
|
{
|
|
struct ffclock_estimate cest;
|
|
ffcounter ffc;
|
|
ffcounter update_ffcount;
|
|
ffcounter ffdelta_error;
|
|
|
|
/* Get counter and corresponding time. */
|
|
if ((flags & FFCLOCK_FAST) == FFCLOCK_FAST)
|
|
ffclock_last_tick(&ffc, bt, flags);
|
|
else {
|
|
ffclock_read_counter(&ffc);
|
|
ffclock_convert_abs(ffc, bt, flags);
|
|
}
|
|
|
|
/* Current ffclock estimate, use update_ffcount as generation number. */
|
|
do {
|
|
update_ffcount = ffclock_estimate.update_ffcount;
|
|
bcopy(&ffclock_estimate, &cest, sizeof(struct ffclock_estimate));
|
|
} while (update_ffcount != ffclock_estimate.update_ffcount);
|
|
|
|
/*
|
|
* Leap second adjustment. Total as seen by synchronisation algorithm
|
|
* since it started. cest.leapsec_next is the ffcounter prediction of
|
|
* when the next leapsecond occurs.
|
|
*/
|
|
if ((flags & FFCLOCK_LEAPSEC) == FFCLOCK_LEAPSEC) {
|
|
bt->sec -= cest.leapsec_total;
|
|
if (ffc > cest.leapsec_next)
|
|
bt->sec -= cest.leapsec;
|
|
}
|
|
|
|
/* Boot time adjustment, for uptime/monotonic clocks. */
|
|
if ((flags & FFCLOCK_UPTIME) == FFCLOCK_UPTIME) {
|
|
bintime_sub(bt, &ffclock_boottime);
|
|
}
|
|
|
|
/* Compute error bound if a valid pointer has been passed. */
|
|
if (error_bound) {
|
|
ffdelta_error = ffc - cest.update_ffcount;
|
|
ffclock_convert_diff(ffdelta_error, error_bound);
|
|
/* 18446744073709 = int(2^64/1e12), err_bound_rate in [ps/s] */
|
|
bintime_mul(error_bound, cest.errb_rate *
|
|
(uint64_t)18446744073709LL);
|
|
/* 18446744073 = int(2^64 / 1e9), since err_abs in [ns] */
|
|
bintime_addx(error_bound, cest.errb_abs *
|
|
(uint64_t)18446744073LL);
|
|
}
|
|
|
|
if (ffcount)
|
|
*ffcount = ffc;
|
|
}
|
|
|
|
/*
|
|
* Feed-forward difference clock. This should be the preferred way to convert a
|
|
* time interval in ffcounter values into a time interval in seconds. If a valid
|
|
* pointer is passed, an upper bound on the error in computing the time interval
|
|
* in seconds is provided.
|
|
*/
|
|
void
|
|
ffclock_difftime(ffcounter ffdelta, struct bintime *bt,
|
|
struct bintime *error_bound)
|
|
{
|
|
ffcounter update_ffcount;
|
|
uint32_t err_rate;
|
|
|
|
ffclock_convert_diff(ffdelta, bt);
|
|
|
|
if (error_bound) {
|
|
do {
|
|
update_ffcount = ffclock_estimate.update_ffcount;
|
|
err_rate = ffclock_estimate.errb_rate;
|
|
} while (update_ffcount != ffclock_estimate.update_ffcount);
|
|
|
|
ffclock_convert_diff(ffdelta, error_bound);
|
|
/* 18446744073709 = int(2^64/1e12), err_bound_rate in [ps/s] */
|
|
bintime_mul(error_bound, err_rate * (uint64_t)18446744073709LL);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Create a new kern.sysclock sysctl node, which will be home to some generic
|
|
* sysclock configuration variables. Feed-forward clock specific variables will
|
|
* live under the ffclock subnode.
|
|
*/
|
|
|
|
SYSCTL_NODE(_kern, OID_AUTO, sysclock, CTLFLAG_RW, 0,
|
|
"System clock related configuration");
|
|
SYSCTL_NODE(_kern_sysclock, OID_AUTO, ffclock, CTLFLAG_RW, 0,
|
|
"Feed-forward clock configuration");
|
|
|
|
static char *sysclocks[] = {"feedback", "feed-forward"};
|
|
#define MAX_SYSCLOCK_NAME_LEN 16
|
|
#define NUM_SYSCLOCKS (sizeof(sysclocks) / sizeof(*sysclocks))
|
|
|
|
static int ffclock_version = 2;
|
|
SYSCTL_INT(_kern_sysclock_ffclock, OID_AUTO, version, CTLFLAG_RD,
|
|
&ffclock_version, 0, "Feed-forward clock kernel version");
|
|
|
|
/* List available sysclocks. */
|
|
static int
|
|
sysctl_kern_sysclock_available(SYSCTL_HANDLER_ARGS)
|
|
{
|
|
struct sbuf *s;
|
|
int clk, error;
|
|
|
|
s = sbuf_new_for_sysctl(NULL, NULL,
|
|
MAX_SYSCLOCK_NAME_LEN * NUM_SYSCLOCKS, req);
|
|
if (s == NULL)
|
|
return (ENOMEM);
|
|
|
|
for (clk = 0; clk < NUM_SYSCLOCKS; clk++) {
|
|
sbuf_cat(s, sysclocks[clk]);
|
|
if (clk + 1 < NUM_SYSCLOCKS)
|
|
sbuf_cat(s, " ");
|
|
}
|
|
error = sbuf_finish(s);
|
|
sbuf_delete(s);
|
|
|
|
return (error);
|
|
}
|
|
|
|
SYSCTL_PROC(_kern_sysclock, OID_AUTO, available, CTLTYPE_STRING | CTLFLAG_RD,
|
|
0, 0, sysctl_kern_sysclock_available, "A",
|
|
"List of available system clocks");
|
|
|
|
/*
|
|
* Return the name of the active system clock if read, or attempt to change
|
|
* the active system clock to the user specified one if written to. The active
|
|
* system clock is read when calling any of the [get]{bin,nano,micro}[up]time()
|
|
* functions.
|
|
*/
|
|
static int
|
|
sysctl_kern_sysclock_active(SYSCTL_HANDLER_ARGS)
|
|
{
|
|
char newclock[MAX_SYSCLOCK_NAME_LEN];
|
|
int error;
|
|
int clk;
|
|
|
|
/* Return the name of the current active sysclock. */
|
|
strlcpy(newclock, sysclocks[sysclock_active], sizeof(newclock));
|
|
error = sysctl_handle_string(oidp, newclock, sizeof(newclock), req);
|
|
|
|
/* Check for error or no change */
|
|
if (error != 0 || req->newptr == NULL)
|
|
goto done;
|
|
|
|
/* Change the active sysclock to the user specified one: */
|
|
error = EINVAL;
|
|
for (clk = 0; clk < NUM_SYSCLOCKS; clk++) {
|
|
if (strncmp(newclock, sysclocks[clk],
|
|
MAX_SYSCLOCK_NAME_LEN - 1)) {
|
|
continue;
|
|
}
|
|
sysclock_active = clk;
|
|
error = 0;
|
|
break;
|
|
}
|
|
done:
|
|
return (error);
|
|
}
|
|
|
|
SYSCTL_PROC(_kern_sysclock, OID_AUTO, active, CTLTYPE_STRING | CTLFLAG_RW,
|
|
0, 0, sysctl_kern_sysclock_active, "A",
|
|
"Name of the active system clock which is currently serving time");
|
|
|
|
static int sysctl_kern_ffclock_ffcounter_bypass = 0;
|
|
SYSCTL_INT(_kern_sysclock_ffclock, OID_AUTO, ffcounter_bypass, CTLFLAG_RW,
|
|
&sysctl_kern_ffclock_ffcounter_bypass, 0,
|
|
"Use reliable hardware timecounter as the feed-forward counter");
|
|
|
|
/*
|
|
* High level functions to access the Feed-Forward Clock.
|
|
*/
|
|
void
|
|
ffclock_bintime(struct bintime *bt)
|
|
{
|
|
|
|
ffclock_abstime(NULL, bt, NULL, FFCLOCK_LERP | FFCLOCK_LEAPSEC);
|
|
}
|
|
|
|
void
|
|
ffclock_nanotime(struct timespec *tsp)
|
|
{
|
|
struct bintime bt;
|
|
|
|
ffclock_abstime(NULL, &bt, NULL, FFCLOCK_LERP | FFCLOCK_LEAPSEC);
|
|
bintime2timespec(&bt, tsp);
|
|
}
|
|
|
|
void
|
|
ffclock_microtime(struct timeval *tvp)
|
|
{
|
|
struct bintime bt;
|
|
|
|
ffclock_abstime(NULL, &bt, NULL, FFCLOCK_LERP | FFCLOCK_LEAPSEC);
|
|
bintime2timeval(&bt, tvp);
|
|
}
|
|
|
|
void
|
|
ffclock_getbintime(struct bintime *bt)
|
|
{
|
|
|
|
ffclock_abstime(NULL, bt, NULL,
|
|
FFCLOCK_LERP | FFCLOCK_LEAPSEC | FFCLOCK_FAST);
|
|
}
|
|
|
|
void
|
|
ffclock_getnanotime(struct timespec *tsp)
|
|
{
|
|
struct bintime bt;
|
|
|
|
ffclock_abstime(NULL, &bt, NULL,
|
|
FFCLOCK_LERP | FFCLOCK_LEAPSEC | FFCLOCK_FAST);
|
|
bintime2timespec(&bt, tsp);
|
|
}
|
|
|
|
void
|
|
ffclock_getmicrotime(struct timeval *tvp)
|
|
{
|
|
struct bintime bt;
|
|
|
|
ffclock_abstime(NULL, &bt, NULL,
|
|
FFCLOCK_LERP | FFCLOCK_LEAPSEC | FFCLOCK_FAST);
|
|
bintime2timeval(&bt, tvp);
|
|
}
|
|
|
|
void
|
|
ffclock_binuptime(struct bintime *bt)
|
|
{
|
|
|
|
ffclock_abstime(NULL, bt, NULL, FFCLOCK_LERP | FFCLOCK_UPTIME);
|
|
}
|
|
|
|
void
|
|
ffclock_nanouptime(struct timespec *tsp)
|
|
{
|
|
struct bintime bt;
|
|
|
|
ffclock_abstime(NULL, &bt, NULL, FFCLOCK_LERP | FFCLOCK_UPTIME);
|
|
bintime2timespec(&bt, tsp);
|
|
}
|
|
|
|
void
|
|
ffclock_microuptime(struct timeval *tvp)
|
|
{
|
|
struct bintime bt;
|
|
|
|
ffclock_abstime(NULL, &bt, NULL, FFCLOCK_LERP | FFCLOCK_UPTIME);
|
|
bintime2timeval(&bt, tvp);
|
|
}
|
|
|
|
void
|
|
ffclock_getbinuptime(struct bintime *bt)
|
|
{
|
|
|
|
ffclock_abstime(NULL, bt, NULL,
|
|
FFCLOCK_LERP | FFCLOCK_UPTIME | FFCLOCK_FAST);
|
|
}
|
|
|
|
void
|
|
ffclock_getnanouptime(struct timespec *tsp)
|
|
{
|
|
struct bintime bt;
|
|
|
|
ffclock_abstime(NULL, &bt, NULL,
|
|
FFCLOCK_LERP | FFCLOCK_UPTIME | FFCLOCK_FAST);
|
|
bintime2timespec(&bt, tsp);
|
|
}
|
|
|
|
void
|
|
ffclock_getmicrouptime(struct timeval *tvp)
|
|
{
|
|
struct bintime bt;
|
|
|
|
ffclock_abstime(NULL, &bt, NULL,
|
|
FFCLOCK_LERP | FFCLOCK_UPTIME | FFCLOCK_FAST);
|
|
bintime2timeval(&bt, tvp);
|
|
}
|
|
|
|
void
|
|
ffclock_bindifftime(ffcounter ffdelta, struct bintime *bt)
|
|
{
|
|
|
|
ffclock_difftime(ffdelta, bt, NULL);
|
|
}
|
|
|
|
void
|
|
ffclock_nanodifftime(ffcounter ffdelta, struct timespec *tsp)
|
|
{
|
|
struct bintime bt;
|
|
|
|
ffclock_difftime(ffdelta, &bt, NULL);
|
|
bintime2timespec(&bt, tsp);
|
|
}
|
|
|
|
void
|
|
ffclock_microdifftime(ffcounter ffdelta, struct timeval *tvp)
|
|
{
|
|
struct bintime bt;
|
|
|
|
ffclock_difftime(ffdelta, &bt, NULL);
|
|
bintime2timeval(&bt, tvp);
|
|
}
|
|
|
|
/*
|
|
* System call allowing userland applications to retrieve the current value of
|
|
* the Feed-Forward Clock counter.
|
|
*/
|
|
#ifndef _SYS_SYSPROTO_H_
|
|
struct ffclock_getcounter_args {
|
|
ffcounter *ffcount;
|
|
};
|
|
#endif
|
|
/* ARGSUSED */
|
|
int
|
|
sys_ffclock_getcounter(struct thread *td, struct ffclock_getcounter_args *uap)
|
|
{
|
|
ffcounter ffcount;
|
|
int error;
|
|
|
|
ffcount = 0;
|
|
ffclock_read_counter(&ffcount);
|
|
if (ffcount == 0)
|
|
return (EAGAIN);
|
|
error = copyout(&ffcount, uap->ffcount, sizeof(ffcounter));
|
|
|
|
return (error);
|
|
}
|
|
|
|
/*
|
|
* System call allowing the synchronisation daemon to push new feed-foward clock
|
|
* estimates to the kernel. Acquire ffclock_mtx to prevent concurrent updates
|
|
* and ensure data consistency.
|
|
* NOTE: ffclock_updated signals the fftimehands that new estimates are
|
|
* available. The updated estimates are picked up by the fftimehands on next
|
|
* tick, which could take as long as 1/hz seconds (if ticks are not missed).
|
|
*/
|
|
#ifndef _SYS_SYSPROTO_H_
|
|
struct ffclock_setestimate_args {
|
|
struct ffclock_estimate *cest;
|
|
};
|
|
#endif
|
|
/* ARGSUSED */
|
|
int
|
|
sys_ffclock_setestimate(struct thread *td, struct ffclock_setestimate_args *uap)
|
|
{
|
|
struct ffclock_estimate cest;
|
|
int error;
|
|
|
|
/* Reuse of PRIV_CLOCK_SETTIME. */
|
|
if ((error = priv_check(td, PRIV_CLOCK_SETTIME)) != 0)
|
|
return (error);
|
|
|
|
if ((error = copyin(uap->cest, &cest, sizeof(struct ffclock_estimate)))
|
|
!= 0)
|
|
return (error);
|
|
|
|
mtx_lock(&ffclock_mtx);
|
|
memcpy(&ffclock_estimate, &cest, sizeof(struct ffclock_estimate));
|
|
ffclock_updated++;
|
|
mtx_unlock(&ffclock_mtx);
|
|
return (error);
|
|
}
|
|
|
|
/*
|
|
* System call allowing userland applications to retrieve the clock estimates
|
|
* stored within the kernel. It is useful to kickstart the synchronisation
|
|
* daemon with the kernel's knowledge of hardware timecounter.
|
|
*/
|
|
#ifndef _SYS_SYSPROTO_H_
|
|
struct ffclock_getestimate_args {
|
|
struct ffclock_estimate *cest;
|
|
};
|
|
#endif
|
|
/* ARGSUSED */
|
|
int
|
|
sys_ffclock_getestimate(struct thread *td, struct ffclock_getestimate_args *uap)
|
|
{
|
|
struct ffclock_estimate cest;
|
|
int error;
|
|
|
|
mtx_lock(&ffclock_mtx);
|
|
memcpy(&cest, &ffclock_estimate, sizeof(struct ffclock_estimate));
|
|
mtx_unlock(&ffclock_mtx);
|
|
error = copyout(&cest, uap->cest, sizeof(struct ffclock_estimate));
|
|
return (error);
|
|
}
|
|
|
|
#else /* !FFCLOCK */
|
|
|
|
int
|
|
sys_ffclock_getcounter(struct thread *td, struct ffclock_getcounter_args *uap)
|
|
{
|
|
|
|
return (ENOSYS);
|
|
}
|
|
|
|
int
|
|
sys_ffclock_setestimate(struct thread *td, struct ffclock_setestimate_args *uap)
|
|
{
|
|
|
|
return (ENOSYS);
|
|
}
|
|
|
|
int
|
|
sys_ffclock_getestimate(struct thread *td, struct ffclock_getestimate_args *uap)
|
|
{
|
|
|
|
return (ENOSYS);
|
|
}
|
|
|
|
#endif /* FFCLOCK */
|