Currently the ntptime code and resettodr() are Giant-locked. In

particular, the Giant is supposed to protect against parallel
ntp_adjtime(2) invocations.  But, for instance, sys_ntp_adjtime() does
copyout(9) under Giant and then examines time_status to return syscall
result.  Since copyout(9) could sleep, the syscall result might be
inconsistent.

Another and more important issue is that if PPS is configured,
hardpps(9) is executed without any protection against the parallel
top-level code invocation. Potentially, this may result in the
inconsistent state of the ntptime state variables, but I cannot say
how serious such distortion is. The non-functional splclock() call in
sys_ntp_adjtime() protected against clock interrupts calling hardpps()
in the pre-SMP era.

Modernize the locking. A mutex protects ntptime data.  Due to the
hardpps() KPI legitimately serving from the interrupt filters (and
e.g. uart(4) does call it from filter), the lock cannot be sleepable
mutex if PPS_SYNC is defined.  Otherwise, use normal sleepable mutex
to reduce interrupt latency.

Reviewed by:	  imp, jhb
Sponsored by:	  The FreeBSD Foundation
Approved by:	  re (gjb)
Differential revision:	https://reviews.freebsd.org/D6825
This commit is contained in:
Konstantin Belousov 2016-06-28 16:43:23 +00:00
parent 3a0e6f920a
commit 364c516cff
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=302252

View File

@ -162,6 +162,30 @@ static l_fp time_adj; /* tick adjust (ns/s) */
static int64_t time_adjtime; /* correction from adjtime(2) (usec) */
static struct mtx ntpadj_lock;
MTX_SYSINIT(ntpadj, &ntpadj_lock, "ntpadj",
#ifdef PPS_SYNC
MTX_SPIN
#else
MTX_DEF
#endif
);
/*
* When PPS_SYNC is defined, hardpps() function is provided which can
* be legitimately called from interrupt filters. Due to this, use
* spinlock for ntptime state protection, otherwise sleepable mutex is
* adequate.
*/
#ifdef PPS_SYNC
#define NTPADJ_LOCK() mtx_lock_spin(&ntpadj_lock)
#define NTPADJ_UNLOCK() mtx_unlock_spin(&ntpadj_lock)
#else
#define NTPADJ_LOCK() mtx_lock(&ntpadj_lock)
#define NTPADJ_UNLOCK() mtx_unlock(&ntpadj_lock)
#endif
#define NTPADJ_ASSERT_LOCKED() mtx_assert(&ntpadj_lock, MA_OWNED)
#ifdef PPS_SYNC
/*
* The following variables are used when a pulse-per-second (PPS) signal
@ -203,11 +227,12 @@ static long pps_errcnt; /* calibration errors */
static void ntp_init(void);
static void hardupdate(long offset);
static void ntp_gettime1(struct ntptimeval *ntvp);
static int ntp_is_time_error(void);
static bool ntp_is_time_error(int tsl);
static int
ntp_is_time_error(void)
static bool
ntp_is_time_error(int tsl)
{
/*
* Status word error decode. If any of these conditions occur,
* an error is returned, instead of the status word. Most
@ -216,30 +241,29 @@ ntp_is_time_error(void)
*
* Hardware or software error
*/
if ((time_status & (STA_UNSYNC | STA_CLOCKERR)) ||
if ((tsl & (STA_UNSYNC | STA_CLOCKERR)) ||
/*
* PPS signal lost when either time or frequency synchronization
* requested
*/
(time_status & (STA_PPSFREQ | STA_PPSTIME) &&
!(time_status & STA_PPSSIGNAL)) ||
(tsl & (STA_PPSFREQ | STA_PPSTIME) &&
!(tsl & STA_PPSSIGNAL)) ||
/*
* PPS jitter exceeded when time synchronization requested
*/
(time_status & STA_PPSTIME &&
time_status & STA_PPSJITTER) ||
(tsl & STA_PPSTIME && tsl & STA_PPSJITTER) ||
/*
* PPS wander exceeded or calibration error when frequency
* synchronization requested
*/
(time_status & STA_PPSFREQ &&
time_status & (STA_PPSWANDER | STA_PPSERROR)))
return (1);
(tsl & STA_PPSFREQ &&
tsl & (STA_PPSWANDER | STA_PPSERROR)))
return (true);
return (0);
return (false);
}
static void
@ -247,7 +271,7 @@ ntp_gettime1(struct ntptimeval *ntvp)
{
struct timespec atv; /* nanosecond time */
GIANT_REQUIRED;
NTPADJ_ASSERT_LOCKED();
nanotime(&atv);
ntvp->time.tv_sec = atv.tv_sec;
@ -257,7 +281,7 @@ ntp_gettime1(struct ntptimeval *ntvp)
ntvp->tai = time_tai;
ntvp->time_state = time_state;
if (ntp_is_time_error())
if (ntp_is_time_error(time_status))
ntvp->time_state = TIME_ERROR;
}
@ -278,9 +302,9 @@ sys_ntp_gettime(struct thread *td, struct ntp_gettime_args *uap)
{
struct ntptimeval ntv;
mtx_lock(&Giant);
NTPADJ_LOCK();
ntp_gettime1(&ntv);
mtx_unlock(&Giant);
NTPADJ_UNLOCK();
td->td_retval[0] = ntv.time_state;
return (copyout(&ntv, uap->ntvp, sizeof(ntv)));
@ -291,14 +315,17 @@ ntp_sysctl(SYSCTL_HANDLER_ARGS)
{
struct ntptimeval ntv; /* temporary structure */
NTPADJ_LOCK();
ntp_gettime1(&ntv);
NTPADJ_UNLOCK();
return (sysctl_handle_opaque(oidp, &ntv, sizeof(ntv), req));
}
SYSCTL_NODE(_kern, OID_AUTO, ntp_pll, CTLFLAG_RW, 0, "");
SYSCTL_PROC(_kern_ntp_pll, OID_AUTO, gettime, CTLTYPE_OPAQUE|CTLFLAG_RD,
0, sizeof(struct ntptimeval) , ntp_sysctl, "S,ntptimeval", "");
SYSCTL_PROC(_kern_ntp_pll, OID_AUTO, gettime, CTLTYPE_OPAQUE | CTLFLAG_RD |
CTLFLAG_MPSAFE, 0, sizeof(struct ntptimeval) , ntp_sysctl, "S,ntptimeval",
"");
#ifdef PPS_SYNC
SYSCTL_INT(_kern_ntp_pll, OID_AUTO, pps_shiftmax, CTLFLAG_RW,
@ -308,10 +335,12 @@ SYSCTL_INT(_kern_ntp_pll, OID_AUTO, pps_shift, CTLFLAG_RW,
SYSCTL_LONG(_kern_ntp_pll, OID_AUTO, time_monitor, CTLFLAG_RD,
&time_monitor, 0, "Last time offset scaled (ns)");
SYSCTL_OPAQUE(_kern_ntp_pll, OID_AUTO, pps_freq, CTLFLAG_RD,
&pps_freq, sizeof(pps_freq), "I", "Scaled frequency offset (ns/sec)");
SYSCTL_OPAQUE(_kern_ntp_pll, OID_AUTO, time_freq, CTLFLAG_RD,
&time_freq, sizeof(time_freq), "I", "Frequency offset (ns/sec)");
SYSCTL_S64(_kern_ntp_pll, OID_AUTO, pps_freq, CTLFLAG_RD | CTLFLAG_MPSAFE,
&pps_freq, 0,
"Scaled frequency offset (ns/sec)");
SYSCTL_S64(_kern_ntp_pll, OID_AUTO, time_freq, CTLFLAG_RD | CTLFLAG_MPSAFE,
&time_freq, 0,
"Frequency offset (ns/sec)");
#endif
/*
@ -333,12 +362,11 @@ sys_ntp_adjtime(struct thread *td, struct ntp_adjtime_args *uap)
struct timex ntv; /* temporary structure */
long freq; /* frequency ns/s) */
int modes; /* mode bits from structure */
int s; /* caller priority */
int error;
int error, retval;
error = copyin((caddr_t)uap->tp, (caddr_t)&ntv, sizeof(ntv));
if (error)
return(error);
return (error);
/*
* Update selected clock variables - only the superuser can
@ -349,13 +377,12 @@ sys_ntp_adjtime(struct thread *td, struct ntp_adjtime_args *uap)
* the STA_PLL bit in the status word is cleared, the state and
* status words are reset to the initial values at boot.
*/
mtx_lock(&Giant);
modes = ntv.modes;
if (modes)
error = priv_check(td, PRIV_NTP_ADJTIME);
if (error)
goto done2;
s = splclock();
if (error != 0)
return (error);
NTPADJ_LOCK();
if (modes & MOD_MAXERROR)
time_maxerror = ntv.maxerror;
if (modes & MOD_ESTERROR)
@ -456,19 +483,12 @@ sys_ntp_adjtime(struct thread *td, struct ntp_adjtime_args *uap)
ntv.jitcnt = pps_jitcnt;
ntv.stbcnt = pps_stbcnt;
#endif /* PPS_SYNC */
splx(s);
retval = ntp_is_time_error(time_status) ? TIME_ERROR : time_state;
NTPADJ_UNLOCK();
error = copyout((caddr_t)&ntv, (caddr_t)uap->tp, sizeof(ntv));
if (error)
goto done2;
if (ntp_is_time_error())
td->td_retval[0] = TIME_ERROR;
else
td->td_retval[0] = time_state;
done2:
mtx_unlock(&Giant);
if (error == 0)
td->td_retval[0] = retval;
return (error);
}
@ -620,7 +640,7 @@ ntp_update_second(int64_t *adjustment, time_t *newsec)
* probably be integrated with the code that does that.
*/
static void
ntp_init()
ntp_init(void)
{
/*
@ -670,6 +690,8 @@ hardupdate(offset)
long mtemp;
l_fp ftemp;
NTPADJ_ASSERT_LOCKED();
/*
* Select how the phase is to be controlled and from which
* source. If the PPS signal is present and enabled to
@ -750,6 +772,8 @@ hardpps(tsp, nsec)
long u_sec, u_nsec, v_nsec; /* temps */
l_fp ftemp;
NTPADJ_LOCK();
/*
* The signal is first processed by a range gate and frequency
* discriminator. The range gate rejects noise spikes outside
@ -770,9 +794,8 @@ hardpps(tsp, nsec)
u_sec++;
}
v_nsec = u_nsec - pps_tf[0].tv_nsec;
if (u_sec == pps_tf[0].tv_sec && v_nsec < NANOSECOND -
MAXFREQ)
return;
if (u_sec == pps_tf[0].tv_sec && v_nsec < NANOSECOND - MAXFREQ)
goto out;
pps_tf[2] = pps_tf[1];
pps_tf[1] = pps_tf[0];
pps_tf[0].tv_sec = u_sec;
@ -793,7 +816,7 @@ hardpps(tsp, nsec)
u_nsec += NANOSECOND;
pps_fcount += u_nsec;
if (v_nsec > MAXFREQ || v_nsec < -MAXFREQ)
return;
goto out;
time_status &= ~STA_PPSJITTER;
/*
@ -839,7 +862,7 @@ hardpps(tsp, nsec)
* timecounter running faster than 1 GHz the lower bound is 2ns, just
* to avoid a nonsensical threshold of zero.
*/
if (u_nsec > lmax(pps_jitter << PPS_POPCORN,
if (u_nsec > lmax(pps_jitter << PPS_POPCORN,
2 * (NANOSECOND / (long)qmin(NANOSECOND, tc_getfrequency())))) {
time_status |= STA_PPSJITTER;
pps_jitcnt++;
@ -850,7 +873,7 @@ hardpps(tsp, nsec)
pps_jitter += (u_nsec - pps_jitter) >> PPS_FAVG;
u_sec = pps_tf[0].tv_sec - pps_lastsec;
if (u_sec < (1 << pps_shift))
return;
goto out;
/*
* At the end of the calibration interval the difference between
@ -867,11 +890,10 @@ hardpps(tsp, nsec)
pps_lastsec = pps_tf[0].tv_sec;
pps_fcount = 0;
u_nsec = MAXFREQ << pps_shift;
if (v_nsec > u_nsec || v_nsec < -u_nsec || u_sec != (1 <<
pps_shift)) {
if (v_nsec > u_nsec || v_nsec < -u_nsec || u_sec != (1 << pps_shift)) {
time_status |= STA_PPSERROR;
pps_errcnt++;
return;
goto out;
}
/*
@ -932,6 +954,9 @@ hardpps(tsp, nsec)
L_LINT(pps_freq, -MAXFREQ);
if (time_status & STA_PPSFREQ)
time_freq = pps_freq;
out:
NTPADJ_UNLOCK();
}
#endif /* PPS_SYNC */
@ -965,27 +990,29 @@ int
kern_adjtime(struct thread *td, struct timeval *delta, struct timeval *olddelta)
{
struct timeval atv;
int64_t ltr, ltw;
int error;
mtx_lock(&Giant);
if (olddelta) {
atv.tv_sec = time_adjtime / 1000000;
atv.tv_usec = time_adjtime % 1000000;
if (delta != NULL) {
error = priv_check(td, PRIV_ADJTIME);
if (error != 0)
return (error);
ltw = (int64_t)delta->tv_sec * 1000000 + delta->tv_usec;
}
NTPADJ_LOCK();
ltr = time_adjtime;
if (delta != NULL)
time_adjtime = ltw;
NTPADJ_UNLOCK();
if (olddelta != NULL) {
atv.tv_sec = ltr / 1000000;
atv.tv_usec = ltr % 1000000;
if (atv.tv_usec < 0) {
atv.tv_usec += 1000000;
atv.tv_sec--;
}
*olddelta = atv;
}
if (delta) {
if ((error = priv_check(td, PRIV_ADJTIME))) {
mtx_unlock(&Giant);
return (error);
}
time_adjtime = (int64_t)delta->tv_sec * 1000000 +
delta->tv_usec;
}
mtx_unlock(&Giant);
return (0);
}
@ -996,11 +1023,12 @@ static void
periodic_resettodr(void *arg __unused)
{
if (!ntp_is_time_error()) {
mtx_lock(&Giant);
/*
* Read of time_status is lock-less, which is fine since
* ntp_is_time_error() operates on the consistent read value.
*/
if (!ntp_is_time_error(time_status))
resettodr();
mtx_unlock(&Giant);
}
if (resettodr_period > 0)
callout_schedule(&resettodr_callout, resettodr_period * hz);
}
@ -1010,11 +1038,9 @@ shutdown_resettodr(void *arg __unused, int howto __unused)
{
callout_drain(&resettodr_callout);
if (resettodr_period > 0 && !ntp_is_time_error()) {
mtx_lock(&Giant);
/* Another unlocked read of time_status */
if (resettodr_period > 0 && !ntp_is_time_error(time_status))
resettodr();
mtx_unlock(&Giant);
}
}
static int
@ -1036,9 +1062,9 @@ sysctl_resettodr_period(SYSCTL_HANDLER_ARGS)
return (0);
}
SYSCTL_PROC(_machdep, OID_AUTO, rtc_save_period, CTLTYPE_INT|CTLFLAG_RWTUN,
&resettodr_period, 1800, sysctl_resettodr_period, "I",
"Save system time to RTC with this period (in seconds)");
SYSCTL_PROC(_machdep, OID_AUTO, rtc_save_period, CTLTYPE_INT | CTLFLAG_RWTUN |
CTLFLAG_MPSAFE, &resettodr_period, 1800, sysctl_resettodr_period, "I",
"Save system time to RTC with this period (in seconds)");
static void
start_periodic_resettodr(void *arg __unused)