kvmclock: Fix initialization when EARLY_AP_STARTUP is not defined

To attach to the hypervisor, kvmclock needs to write a per-CPU MSR.
When EARLY_AP_STARTUP is not defined, device attach happens too early:
APs are not yet spun up, so smp_rendezvous only runs the callback on the
local CPU.  As a result, the timecounter only gets initialized on the
BSP, and then timekeeping is broken on SMP systems.

Implement handling for !EARLY_AP_STARTUP kernels: keep track of the CPU
on which device attach ran, and then use a SI_SUB_SMP SYSINIT to
register the rest of the CPUs with the hypervisor.

Reported by:	Shrikanth R Kamath <kshrikanth@juniper.net>
Reviewed by:	kib, jhb (earlier versions)
Sponsored by:	Klara, Inc.
Sponsored by:	Juniper Networks, Inc.
MFC after:	2 weeks
Differential Revision:	https://reviews.freebsd.org/D37705
This commit is contained in:
Mark Johnston 2023-01-13 10:01:00 -05:00
parent 110ce09c90
commit 568f552b04

View File

@ -71,10 +71,14 @@ struct kvm_clock_softc {
struct pvclock_vcpu_time_info *timeinfos;
u_int msr_tc;
u_int msr_wc;
#ifndef EARLY_AP_STARTUP
int firstcpu;
#endif
};
static struct pvclock_wall_clock *kvm_clock_get_wallclock(void *arg);
static void kvm_clock_system_time_enable(struct kvm_clock_softc *sc);
static void kvm_clock_system_time_enable(struct kvm_clock_softc *sc,
const cpuset_t *cpus);
static void kvm_clock_system_time_enable_pcpu(void *arg);
static void kvm_clock_setup_sysctl(device_t);
@ -88,9 +92,10 @@ kvm_clock_get_wallclock(void *arg)
}
static void
kvm_clock_system_time_enable(struct kvm_clock_softc *sc)
kvm_clock_system_time_enable(struct kvm_clock_softc *sc, const cpuset_t *cpus)
{
smp_rendezvous(NULL, kvm_clock_system_time_enable_pcpu, NULL, sc);
smp_rendezvous_cpus(*cpus, NULL, kvm_clock_system_time_enable_pcpu,
NULL, sc);
}
static void
@ -104,6 +109,32 @@ kvm_clock_system_time_enable_pcpu(void *arg)
wrmsr(sc->msr_tc, vtophys(&(sc->timeinfos)[curcpu]) | 1);
}
#ifndef EARLY_AP_STARTUP
static void
kvm_clock_init_smp(void *arg __unused)
{
devclass_t kvm_clock_devclass;
cpuset_t cpus;
struct kvm_clock_softc *sc;
kvm_clock_devclass = devclass_find(KVM_CLOCK_DEVNAME);
sc = devclass_get_softc(kvm_clock_devclass, 0);
if (sc == NULL || mp_ncpus == 1)
return;
/*
* Register with the hypervisor on all CPUs except the one that
* registered in kvm_clock_attach().
*/
cpus = all_cpus;
KASSERT(CPU_ISSET(sc->firstcpu, &cpus),
("%s: invalid first CPU %d", __func__, sc->firstcpu));
CPU_CLR(sc->firstcpu, &cpus);
kvm_clock_system_time_enable(sc, &cpus);
}
SYSINIT(kvm_clock, SI_SUB_SMP, SI_ORDER_ANY, kvm_clock_init_smp, NULL);
#endif
static void
kvm_clock_identify(driver_t *driver, device_t parent)
{
@ -150,7 +181,12 @@ kvm_clock_attach(device_t dev)
/* Set up 'struct pvclock_vcpu_time_info' page(s): */
sc->timeinfos = kmem_malloc(mp_ncpus *
sizeof(struct pvclock_vcpu_time_info), M_WAITOK | M_ZERO);
kvm_clock_system_time_enable(sc);
#ifdef EARLY_AP_STARTUP
kvm_clock_system_time_enable(sc, &all_cpus);
#else
sc->firstcpu = curcpu;
kvm_clock_system_time_enable_pcpu(sc);
#endif
/*
* Init pvclock; register KVM clock wall clock, register KVM clock
@ -192,7 +228,7 @@ kvm_clock_resume(device_t dev)
* conservatively assume that the system time must be re-inited in
* suspend/resume scenarios.
*/
kvm_clock_system_time_enable(device_get_softc(dev));
kvm_clock_system_time_enable(device_get_softc(dev), &all_cpus);
pvclock_resume();
inittodr(time_second);
return (0);