Implement simple machine check support for amd64 and i386.

- For CPUs that only support MCE (the machine check exception) but not MCA
  (i.e. Pentium), all this does is print out the value of the machine check
  registers and then panic when a machine check exception occurs.
- For CPUs that support MCA (the machine check architecture), the support is
  a bit more involved.
  - First, there is limited support for decoding the CPU-independent MCA
    error codes in the kernel, and the kernel uses this to output a short
    description of any machine check events that occur.
  - When a machine check exception occurs, all of the MCx banks on the
    current CPU are scanned and any events are reported to the console
    before panic'ing.
  - To catch events for correctable errors, a periodic timer kicks off a
    task which scans the MCx banks on all CPUs.  The frequency of these
    checks is controlled via the "hw.mca.interval" sysctl.
  - Userland can request an immediate scan of the MCx banks by writing
    a non-zero value to "hw.mca.force_scan".
  - If any correctable events are encountered, the appropriate details
    are stored in a 'struct mca_record' (defined in <machine/mca.h>).
    The "hw.mca.count" is a count of such records and each record may
    be queried via the "hw.mca.records" tree by specifying the record
    index (0 .. count - 1) as the next name in the MIB similar to using
    PIDs with the kern.proc.* sysctls.  The idea is to export machine
    check events to userland for more detailed processing.
  - The periodic timer and hw.mca sysctls are only present if the CPU
    supports MCA.

Discussed with:	emaste (briefly)
MFC after:	1 month
This commit is contained in:
jhb 2009-05-13 17:53:04 +00:00
parent deae02e77d
commit 370298a108
14 changed files with 1238 additions and 0 deletions

View File

@ -109,6 +109,7 @@ __FBSDID("$FreeBSD$");
#include <machine/cpu.h>
#include <machine/cputypes.h>
#include <machine/intr_machdep.h>
#include <machine/mca.h>
#include <machine/md_var.h>
#include <machine/metadata.h>
#include <machine/pc/bios.h>
@ -274,6 +275,7 @@ cpu_startup(dummy)
vm_pager_bufferinit();
cpu_setregs();
mca_init();
}
/*

530
sys/amd64/amd64/mca.c Normal file
View File

@ -0,0 +1,530 @@
/*-
* Copyright (c) 2009 Advanced Computing Technologies LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Support for x86 machine check architecture.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/taskqueue.h>
#include <machine/mca.h>
#include <machine/md_var.h>
#include <machine/specialreg.h>
struct mca_internal {
struct mca_record rec;
int logged;
STAILQ_ENTRY(mca_internal) link;
};
static MALLOC_DEFINE(M_MCA, "MCA", "Machine Check Architecture");
static struct sysctl_oid *mca_sysctl_tree;
static int mca_count; /* Number of records stored. */
static STAILQ_HEAD(, mca_internal) mca_records;
static struct callout mca_timer;
static int mca_ticks = 3600; /* Check hourly by default. */
static struct task mca_task;
static struct mtx mca_lock;
static int
sysctl_mca_ticks(SYSCTL_HANDLER_ARGS)
{
int error, value;
value = mca_ticks;
error = sysctl_handle_int(oidp, &value, 0, req);
if (error || req->newptr == NULL)
return (error);
if (value <= 0)
return (EINVAL);
mca_ticks = value;
return (0);
}
static int
sysctl_mca_records(SYSCTL_HANDLER_ARGS)
{
int *name = (int *)arg1;
u_int namelen = arg2;
struct mca_record record;
struct mca_internal *rec;
int i;
if (namelen != 1)
return (EINVAL);
if (name[0] < 0 || name[0] >= mca_count)
return (EINVAL);
mtx_lock_spin(&mca_lock);
if (name[0] >= mca_count) {
mtx_unlock_spin(&mca_lock);
return (EINVAL);
}
i = 0;
STAILQ_FOREACH(rec, &mca_records, link) {
if (i == name[0]) {
record = rec->rec;
break;
}
i++;
}
mtx_unlock_spin(&mca_lock);
return (SYSCTL_OUT(req, &record, sizeof(record)));
}
static struct mca_record *
mca_record_entry(int bank)
{
struct mca_internal *rec;
uint64_t status;
u_int p[4];
status = rdmsr(MSR_MC_STATUS(bank));
if (!(status & MC_STATUS_VAL))
return (NULL);
rec = malloc(sizeof(*rec), M_MCA, M_NOWAIT | M_ZERO);
if (rec == NULL) {
printf("MCA: Unable to allocate space for an event.\n");
return (NULL);
}
/* Save exception information. */
rec->rec.mr_status = status;
if (status & MC_STATUS_ADDRV)
rec->rec.mr_addr = rdmsr(MSR_MC_ADDR(bank));
if (status & MC_STATUS_MISCV)
rec->rec.mr_misc = rdmsr(MSR_MC_MISC(bank));
rec->rec.mr_tsc = rdtsc();
rec->rec.mr_apic_id = PCPU_GET(apic_id);
/*
* Clear machine check. Don't do this for uncorrectable
* errors so that the BIOS can see them.
*/
if (!(rec->rec.mr_status & (MC_STATUS_PCC | MC_STATUS_UC))) {
wrmsr(MSR_MC_STATUS(bank), 0);
do_cpuid(0, p);
}
mtx_lock_spin(&mca_lock);
STAILQ_INSERT_TAIL(&mca_records, rec, link);
mca_count++;
mtx_unlock_spin(&mca_lock);
return (&rec->rec);
}
static const char *
mca_error_ttype(uint16_t mca_error)
{
switch ((mca_error & 0x000c) >> 2) {
case 0:
return ("I");
case 1:
return ("D");
case 2:
return ("G");
}
return ("?");
}
static const char *
mca_error_level(uint16_t mca_error)
{
switch (mca_error & 0x0003) {
case 0:
return ("L0");
case 1:
return ("L1");
case 2:
return ("L2");
case 3:
return ("LG");
}
return ("L?");
}
static const char *
mca_error_request(uint16_t mca_error)
{
switch ((mca_error & 0x00f0) >> 4) {
case 0x0:
return ("ERR");
case 0x1:
return ("RD");
case 0x2:
return ("WR");
case 0x3:
return ("DRD");
case 0x4:
return ("DWR");
case 0x5:
return ("IRD");
case 0x6:
return ("PREFETCH");
case 0x7:
return ("EVICT");
case 0x8:
return ("SNOOP");
}
return ("???");
}
/* Dump details about a single machine check. */
static void
mca_log(struct mca_record *rec)
{
uint16_t mca_error;
printf("MCA: CPU %d ", rec->mr_apic_id);
if (rec->mr_status & MC_STATUS_UC)
printf("UNCOR ");
else
printf("COR ");
if (rec->mr_status & MC_STATUS_PCC)
printf("PCC ");
if (rec->mr_status & MC_STATUS_OVER)
printf("OVER ");
mca_error = rec->mr_status & MC_STATUS_MCA_ERROR;
switch (mca_error) {
/* Simple error codes. */
case 0x0000:
printf("no error");
break;
case 0x0001:
printf("unclassified error");
break;
case 0x0002:
printf("ucode ROM parity error");
break;
case 0x0003:
printf("external error");
break;
case 0x0004:
printf("FRC error");
break;
case 0x0400:
printf("internal timer error");
break;
default:
if ((mca_error & 0xfc00) == 0x0400) {
printf("internal error %x", mca_error & 0x03ff);
break;
}
/* Compound error codes. */
/* Memory hierarchy error. */
if ((mca_error & 0xeffc) == 0x000c) {
printf("%s memory error", mca_error_level(mca_error));
break;
}
/* TLB error. */
if ((mca_error & 0xeff0) == 0x0010) {
printf("%sTLB %s error", mca_error_ttype(mca_error),
mca_error_level(mca_error));
break;
}
/* Cache error. */
if ((mca_error & 0xef00) == 0x0100) {
printf("%sCACHE %s %s error",
mca_error_ttype(mca_error),
mca_error_level(mca_error),
mca_error_request(mca_error));
break;
}
/* Bus and/or Interconnect error. */
if ((mca_error & 0xe800) == 0x0800) {
printf("BUS%s ", mca_error_level(mca_error));
switch ((mca_error & 0x0600) >> 9) {
case 0:
printf("Source");
break;
case 1:
printf("Responder");
break;
case 2:
printf("Observer");
break;
default:
printf("???");
break;
}
printf(" %s ", mca_error_request(mca_error));
switch ((mca_error & 0x000c) >> 2) {
case 0:
printf("Memory");
break;
case 2:
printf("I/O");
break;
case 3:
printf("Other");
break;
default:
printf("???");
break;
}
if (mca_error & 0x0100)
printf(" timed out");
break;
}
printf("unknown error %x", mca_error);
break;
}
printf("\n");
if (rec->mr_status & MC_STATUS_ADDRV)
printf("MCA: Address 0x%llx\n", (long long)rec->mr_addr);
}
/*
* This scans all the machine check banks of the current CPU to see if
* there are any machine checks. Any non-recoverable errors are
* reported immediately via mca_log(). The current thread must be
* pinned when this is called. The 'mcip' parameter indicates if we
* are being called from the MC exception handler. In that case this
* function returns true if the system is restartable. Otherwise, it
* returns a count of the number of valid MC records found.
*/
static int
mca_scan(int mcip)
{
struct mca_record *rec;
uint64_t mcg_cap, ucmask;
int count, i, recoverable;
count = 0;
recoverable = 1;
ucmask = MC_STATUS_UC | MC_STATUS_PCC;
/* When handling a MCE#, treat the OVER flag as non-restartable. */
if (mcip)
ucmask = MC_STATUS_OVER;
mcg_cap = rdmsr(MSR_MCG_CAP);
for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
rec = mca_record_entry(i);
if (rec != NULL) {
count++;
if (rec->mr_status & ucmask) {
recoverable = 0;
mca_log(rec);
}
}
}
return (mcip ? recoverable : count);
}
/*
* Scan the machine check banks on all CPUs by binding to each CPU in
* turn. If any of the CPUs contained new machine check records, log
* them to the console.
*/
static void
mca_scan_cpus(void *context, int pending)
{
struct mca_internal *mca;
struct thread *td;
int count, cpu;
td = curthread;
count = 0;
thread_lock(td);
for (cpu = 0; cpu <= mp_maxid; cpu++) {
if (CPU_ABSENT(cpu))
continue;
sched_bind(td, cpu);
thread_unlock(td);
count += mca_scan(0);
thread_lock(td);
sched_unbind(td);
}
thread_unlock(td);
if (count != 0) {
mtx_lock_spin(&mca_lock);
STAILQ_FOREACH(mca, &mca_records, link) {
if (!mca->logged) {
mca->logged = 1;
mtx_unlock_spin(&mca_lock);
mca_log(&mca->rec);
mtx_lock_spin(&mca_lock);
}
}
mtx_unlock_spin(&mca_lock);
}
}
static void
mca_periodic_scan(void *arg)
{
taskqueue_enqueue(taskqueue_thread, &mca_task);
callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, NULL);
}
static int
sysctl_mca_scan(SYSCTL_HANDLER_ARGS)
{
int error, i;
i = 0;
error = sysctl_handle_int(oidp, &i, 0, req);
if (error)
return (error);
if (i)
taskqueue_enqueue(taskqueue_thread, &mca_task);
return (0);
}
static void
mca_startup(void *dummy)
{
if (!(cpu_feature & CPUID_MCA))
return;
callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan,
NULL);
}
SYSINIT(mca_startup, SI_SUB_SMP, SI_ORDER_ANY, mca_startup, NULL);
static void
mca_setup(void)
{
mtx_init(&mca_lock, "mca", NULL, MTX_SPIN);
STAILQ_INIT(&mca_records);
TASK_INIT(&mca_task, 0x8000, mca_scan_cpus, NULL);
callout_init(&mca_timer, CALLOUT_MPSAFE);
mca_sysctl_tree = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_hw),
OID_AUTO, "mca", CTLFLAG_RW, NULL, "MCA container");
SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(mca_sysctl_tree), OID_AUTO,
"count", CTLFLAG_RD, &mca_count, 0, "Record count");
SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(mca_sysctl_tree), OID_AUTO,
"interval", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &mca_ticks,
0, sysctl_mca_ticks, "I",
"Periodic interval in seconds to scan for machine checks");
SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(mca_sysctl_tree), OID_AUTO,
"records", CTLFLAG_RD, sysctl_mca_records, "Machine check records");
SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(mca_sysctl_tree), OID_AUTO,
"force_scan", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
sysctl_mca_scan, "I", "Force an immediate scan for machine checks");
}
/* Must be executed on each CPU. */
void
mca_init(void)
{
uint64_t mcg_cap;
int i;
/* MCE is required. */
if (!(cpu_feature & CPUID_MCE))
return;
if (cpu_feature & CPUID_MCA) {
if (PCPU_GET(cpuid) == 0)
mca_setup();
sched_pin();
mcg_cap = rdmsr(MSR_MCG_CAP);
if (mcg_cap & MCG_CAP_CTL_P)
/* Enable MCA features. */
wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE);
for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
/*
* Enable logging of all errors. For P6
* processors, MC0_CTL is always enabled.
*
* XXX: Better CPU test needed here?
*/
if (!(i == 0 && (cpu_id & 0xf00) == 0x600))
wrmsr(MSR_MC_CTL(i), 0xffffffffffffffffUL);
/* XXX: Better CPU test needed here. */
if ((cpu_id & 0xf00) == 0xf00)
mca_record_entry(i);
/* Clear all errors. */
wrmsr(MSR_MC_STATUS(i), 0);
}
sched_unpin();
}
load_cr4(rcr4() | CR4_MCE);
}
/* Called when a machine check exception fires. */
int
mca_intr(void)
{
uint64_t mcg_status;
int recoverable;
if (!(cpu_feature & CPUID_MCA)) {
/*
* Just print the values of the old Pentium registers
* and panic.
*/
printf("MC Type: 0x%lx Address: 0x%lx\n",
rdmsr(MSR_P5_MC_TYPE), rdmsr(MSR_P5_MC_ADDR));
return (0);
}
/* Scan the banks and check for any non-recoverable errors. */
recoverable = mca_scan(1);
mcg_status = rdmsr(MSR_MCG_STATUS);
if (!(mcg_status & MCG_STATUS_RIPV))
recoverable = 0;
/* Clear MCIP. */
wrmsr(MSR_MCG_STATUS, mcg_status & ~MCG_STATUS_MCIP);
return (recoverable);
}

View File

@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$");
#include <machine/clock.h>
#include <machine/cputypes.h>
#include <machine/cpufunc.h>
#include <machine/mca.h>
#include <machine/md_var.h>
#include <machine/mp_watchdog.h>
#include <machine/pcb.h>
@ -667,6 +668,8 @@ init_secondary(void)
KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
PCPU_SET(curthread, PCPU_GET(idlethread));
mca_init();
mtx_lock_spin(&ap_boot_mtx);
/* Init local apic for irq's */

View File

@ -88,6 +88,7 @@ __FBSDID("$FreeBSD$");
#include <machine/cpu.h>
#include <machine/intr_machdep.h>
#include <machine/mca.h>
#include <machine/md_var.h>
#include <machine/pcb.h>
#ifdef SMP
@ -266,6 +267,12 @@ trap(struct trapframe *frame)
goto out;
#endif
if (type == T_MCHK) {
if (!mca_intr())
trap_fatal(frame, 0);
goto out;
}
#ifdef KDTRACE_HOOKS
/*
* A trap can occur while DTrace executes a probe. Before

48
sys/amd64/include/mca.h Normal file
View File

@ -0,0 +1,48 @@
/*-
* Copyright (c) 2009 Advanced Computing Technologies LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef __MACHINE_MCA_H__
#define __MACHINE_MCA_H__
struct mca_record {
uint64_t mr_status;
uint64_t mr_addr;
uint64_t mr_misc;
uint64_t mr_tsc;
int mr_apic_id;
};
#ifdef _KERNEL
void mca_init(void);
int mca_intr(void);
#endif
#endif /* !__MACHINE_MCA_H__ */

View File

@ -345,6 +345,34 @@
#define DIR0 0xfe
#define DIR1 0xff
/*
* Machine Check register constants.
*/
#define MCG_CAP_COUNT 0x000000ff
#define MCG_CAP_CTL_P 0x00000100
#define MCG_CAP_EXT_P 0x00000200
#define MCG_CAP_TES_P 0x00000800
#define MCG_CAP_EXT_CNT 0x00ff0000
#define MCG_STATUS_RIPV 0x00000001
#define MCG_STATUS_EIPV 0x00000002
#define MCG_STATUS_MCIP 0x00000004
#define MCG_CTL_ENABLE 0xffffffffffffffffUL
#define MCG_CTL_DISABLE 0x0000000000000000UL
#define MSR_MC_CTL(x) (MSR_MC0_CTL + (x) * 4)
#define MSR_MC_STATUS(x) (MSR_MC0_STATUS + (x) * 4)
#define MSR_MC_ADDR(x) (MSR_MC0_ADDR + (x) * 4)
#define MSR_MC_MISC(x) (MSR_MC0_MISC + (x) * 4)
#define MC_STATUS_MCA_ERROR 0x000000000000ffffUL
#define MC_STATUS_MODEL_ERROR 0x00000000ffff0000UL
#define MC_STATUS_OTHER_INFO 0x01ffffff00000000UL
#define MC_STATUS_PCC 0x0200000000000000UL
#define MC_STATUS_ADDRV 0x0400000000000000UL
#define MC_STATUS_MISCV 0x0800000000000000UL
#define MC_STATUS_EN 0x1000000000000000UL
#define MC_STATUS_UC 0x2000000000000000UL
#define MC_STATUS_OVER 0x4000000000000000UL
#define MC_STATUS_VAL 0x8000000000000000UL
/*
* The following four 3-byte registers control the non-cacheable regions.
* These registers must be written as three separate bytes.

View File

@ -110,6 +110,7 @@ amd64/amd64/legacy.c standard
amd64/amd64/local_apic.c standard
amd64/amd64/locore.S standard no-obj
amd64/amd64/machdep.c standard
amd64/amd64/mca.c standard
amd64/amd64/mem.c optional mem
amd64/amd64/minidump_machdep.c standard
amd64/amd64/mp_machdep.c optional smp

View File

@ -278,6 +278,7 @@ i386/xen/locore.s optional xen no-obj
i386/i386/longrun.c optional cpu_enable_longrun
i386/i386/machdep.c standard
i386/xen/xen_machdep.c optional xen
i386/i386/mca.c standard
i386/i386/mem.c optional mem
i386/i386/minidump_machdep.c standard
i386/i386/mp_clock.c optional smp

View File

@ -113,6 +113,7 @@ __FBSDID("$FreeBSD$");
#include <machine/cpu.h>
#include <machine/cputypes.h>
#include <machine/intr_machdep.h>
#include <machine/mca.h>
#include <machine/md_var.h>
#include <machine/metadata.h>
#include <machine/pc/bios.h>
@ -320,6 +321,7 @@ cpu_startup(dummy)
#ifndef XEN
cpu_setregs();
#endif
mca_init();
}
/*

530
sys/i386/i386/mca.c Normal file
View File

@ -0,0 +1,530 @@
/*-
* Copyright (c) 2009 Advanced Computing Technologies LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Support for x86 machine check architecture.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/taskqueue.h>
#include <machine/mca.h>
#include <machine/md_var.h>
#include <machine/specialreg.h>
struct mca_internal {
struct mca_record rec;
int logged;
STAILQ_ENTRY(mca_internal) link;
};
static MALLOC_DEFINE(M_MCA, "MCA", "Machine Check Architecture");
static struct sysctl_oid *mca_sysctl_tree;
static int mca_count; /* Number of records stored. */
static STAILQ_HEAD(, mca_internal) mca_records;
static struct callout mca_timer;
static int mca_ticks = 3600; /* Check hourly by default. */
static struct task mca_task;
static struct mtx mca_lock;
static int
sysctl_mca_ticks(SYSCTL_HANDLER_ARGS)
{
int error, value;
value = mca_ticks;
error = sysctl_handle_int(oidp, &value, 0, req);
if (error || req->newptr == NULL)
return (error);
if (value <= 0)
return (EINVAL);
mca_ticks = value;
return (0);
}
static int
sysctl_mca_records(SYSCTL_HANDLER_ARGS)
{
int *name = (int *)arg1;
u_int namelen = arg2;
struct mca_record record;
struct mca_internal *rec;
int i;
if (namelen != 1)
return (EINVAL);
if (name[0] < 0 || name[0] >= mca_count)
return (EINVAL);
mtx_lock_spin(&mca_lock);
if (name[0] >= mca_count) {
mtx_unlock_spin(&mca_lock);
return (EINVAL);
}
i = 0;
STAILQ_FOREACH(rec, &mca_records, link) {
if (i == name[0]) {
record = rec->rec;
break;
}
i++;
}
mtx_unlock_spin(&mca_lock);
return (SYSCTL_OUT(req, &record, sizeof(record)));
}
static struct mca_record *
mca_record_entry(int bank)
{
struct mca_internal *rec;
uint64_t status;
u_int p[4];
status = rdmsr(MSR_MC_STATUS(bank));
if (!(status & MC_STATUS_VAL))
return (NULL);
rec = malloc(sizeof(*rec), M_MCA, M_NOWAIT | M_ZERO);
if (rec == NULL) {
printf("MCA: Unable to allocate space for an event.\n");
return (NULL);
}
/* Save exception information. */
rec->rec.mr_status = status;
if (status & MC_STATUS_ADDRV)
rec->rec.mr_addr = rdmsr(MSR_MC_ADDR(bank));
if (status & MC_STATUS_MISCV)
rec->rec.mr_misc = rdmsr(MSR_MC_MISC(bank));
rec->rec.mr_tsc = rdtsc();
rec->rec.mr_apic_id = PCPU_GET(apic_id);
/*
* Clear machine check. Don't do this for uncorrectable
* errors so that the BIOS can see them.
*/
if (!(rec->rec.mr_status & (MC_STATUS_PCC | MC_STATUS_UC))) {
wrmsr(MSR_MC_STATUS(bank), 0);
do_cpuid(0, p);
}
mtx_lock_spin(&mca_lock);
STAILQ_INSERT_TAIL(&mca_records, rec, link);
mca_count++;
mtx_unlock_spin(&mca_lock);
return (&rec->rec);
}
static const char *
mca_error_ttype(uint16_t mca_error)
{
switch ((mca_error & 0x000c) >> 2) {
case 0:
return ("I");
case 1:
return ("D");
case 2:
return ("G");
}
return ("?");
}
static const char *
mca_error_level(uint16_t mca_error)
{
switch (mca_error & 0x0003) {
case 0:
return ("L0");
case 1:
return ("L1");
case 2:
return ("L2");
case 3:
return ("LG");
}
return ("L?");
}
static const char *
mca_error_request(uint16_t mca_error)
{
switch ((mca_error & 0x00f0) >> 4) {
case 0x0:
return ("ERR");
case 0x1:
return ("RD");
case 0x2:
return ("WR");
case 0x3:
return ("DRD");
case 0x4:
return ("DWR");
case 0x5:
return ("IRD");
case 0x6:
return ("PREFETCH");
case 0x7:
return ("EVICT");
case 0x8:
return ("SNOOP");
}
return ("???");
}
/* Dump details about a single machine check. */
static void
mca_log(struct mca_record *rec)
{
uint16_t mca_error;
printf("MCA: CPU %d ", rec->mr_apic_id);
if (rec->mr_status & MC_STATUS_UC)
printf("UNCOR ");
else
printf("COR ");
if (rec->mr_status & MC_STATUS_PCC)
printf("PCC ");
if (rec->mr_status & MC_STATUS_OVER)
printf("OVER ");
mca_error = rec->mr_status & MC_STATUS_MCA_ERROR;
switch (mca_error) {
/* Simple error codes. */
case 0x0000:
printf("no error");
break;
case 0x0001:
printf("unclassified error");
break;
case 0x0002:
printf("ucode ROM parity error");
break;
case 0x0003:
printf("external error");
break;
case 0x0004:
printf("FRC error");
break;
case 0x0400:
printf("internal timer error");
break;
default:
if ((mca_error & 0xfc00) == 0x0400) {
printf("internal error %x", mca_error & 0x03ff);
break;
}
/* Compound error codes. */
/* Memory hierarchy error. */
if ((mca_error & 0xeffc) == 0x000c) {
printf("%s memory error", mca_error_level(mca_error));
break;
}
/* TLB error. */
if ((mca_error & 0xeff0) == 0x0010) {
printf("%sTLB %s error", mca_error_ttype(mca_error),
mca_error_level(mca_error));
break;
}
/* Cache error. */
if ((mca_error & 0xef00) == 0x0100) {
printf("%sCACHE %s %s error",
mca_error_ttype(mca_error),
mca_error_level(mca_error),
mca_error_request(mca_error));
break;
}
/* Bus and/or Interconnect error. */
if ((mca_error & 0xe800) == 0x0800) {
printf("BUS%s ", mca_error_level(mca_error));
switch ((mca_error & 0x0600) >> 9) {
case 0:
printf("Source");
break;
case 1:
printf("Responder");
break;
case 2:
printf("Observer");
break;
default:
printf("???");
break;
}
printf(" %s ", mca_error_request(mca_error));
switch ((mca_error & 0x000c) >> 2) {
case 0:
printf("Memory");
break;
case 2:
printf("I/O");
break;
case 3:
printf("Other");
break;
default:
printf("???");
break;
}
if (mca_error & 0x0100)
printf(" timed out");
break;
}
printf("unknown error %x", mca_error);
break;
}
printf("\n");
if (rec->mr_status & MC_STATUS_ADDRV)
printf("MCA: Address 0x%llx\n", (long long)rec->mr_addr);
}
/*
* This scans all the machine check banks of the current CPU to see if
* there are any machine checks. Any non-recoverable errors are
* reported immediately via mca_log(). The current thread must be
* pinned when this is called. The 'mcip' parameter indicates if we
* are being called from the MC exception handler. In that case this
* function returns true if the system is restartable. Otherwise, it
* returns a count of the number of valid MC records found.
*/
static int
mca_scan(int mcip)
{
struct mca_record *rec;
uint64_t mcg_cap, ucmask;
int count, i, recoverable;
count = 0;
recoverable = 1;
ucmask = MC_STATUS_UC | MC_STATUS_PCC;
/* When handling a MCE#, treat the OVER flag as non-restartable. */
if (mcip)
ucmask = MC_STATUS_OVER;
mcg_cap = rdmsr(MSR_MCG_CAP);
for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
rec = mca_record_entry(i);
if (rec != NULL) {
count++;
if (rec->mr_status & ucmask) {
recoverable = 0;
mca_log(rec);
}
}
}
return (mcip ? recoverable : count);
}
/*
* Scan the machine check banks on all CPUs by binding to each CPU in
* turn. If any of the CPUs contained new machine check records, log
* them to the console.
*/
static void
mca_scan_cpus(void *context, int pending)
{
struct mca_internal *mca;
struct thread *td;
int count, cpu;
td = curthread;
count = 0;
thread_lock(td);
for (cpu = 0; cpu <= mp_maxid; cpu++) {
if (CPU_ABSENT(cpu))
continue;
sched_bind(td, cpu);
thread_unlock(td);
count += mca_scan(0);
thread_lock(td);
sched_unbind(td);
}
thread_unlock(td);
if (count != 0) {
mtx_lock_spin(&mca_lock);
STAILQ_FOREACH(mca, &mca_records, link) {
if (!mca->logged) {
mca->logged = 1;
mtx_unlock_spin(&mca_lock);
mca_log(&mca->rec);
mtx_lock_spin(&mca_lock);
}
}
mtx_unlock_spin(&mca_lock);
}
}
static void
mca_periodic_scan(void *arg)
{
taskqueue_enqueue(taskqueue_thread, &mca_task);
callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, NULL);
}
static int
sysctl_mca_scan(SYSCTL_HANDLER_ARGS)
{
int error, i;
i = 0;
error = sysctl_handle_int(oidp, &i, 0, req);
if (error)
return (error);
if (i)
taskqueue_enqueue(taskqueue_thread, &mca_task);
return (0);
}
static void
mca_startup(void *dummy)
{
if (!(cpu_feature & CPUID_MCA))
return;
callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan,
NULL);
}
SYSINIT(mca_startup, SI_SUB_SMP, SI_ORDER_ANY, mca_startup, NULL);
static void
mca_setup(void)
{
mtx_init(&mca_lock, "mca", NULL, MTX_SPIN);
STAILQ_INIT(&mca_records);
TASK_INIT(&mca_task, 0x8000, mca_scan_cpus, NULL);
callout_init(&mca_timer, CALLOUT_MPSAFE);
mca_sysctl_tree = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_hw),
OID_AUTO, "mca", CTLFLAG_RW, NULL, "MCA container");
SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(mca_sysctl_tree), OID_AUTO,
"count", CTLFLAG_RD, &mca_count, 0, "Record count");
SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(mca_sysctl_tree), OID_AUTO,
"interval", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &mca_ticks,
0, sysctl_mca_ticks, "I",
"Periodic interval in seconds to scan for machine checks");
SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(mca_sysctl_tree), OID_AUTO,
"records", CTLFLAG_RD, sysctl_mca_records, "Machine check records");
SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(mca_sysctl_tree), OID_AUTO,
"force_scan", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
sysctl_mca_scan, "I", "Force an immediate scan for machine checks");
}
/* Must be executed on each CPU. */
void
mca_init(void)
{
uint64_t mcg_cap;
int i;
/* MCE is required. */
if (!(cpu_feature & CPUID_MCE))
return;
if (cpu_feature & CPUID_MCA) {
if (PCPU_GET(cpuid) == 0)
mca_setup();
sched_pin();
mcg_cap = rdmsr(MSR_MCG_CAP);
if (mcg_cap & MCG_CAP_CTL_P)
/* Enable MCA features. */
wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE);
for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
/*
* Enable logging of all errors. For P6
* processors, MC0_CTL is always enabled.
*
* XXX: Better CPU test needed here?
*/
if (!(i == 0 && (cpu_id & 0xf00) == 0x600))
wrmsr(MSR_MC_CTL(i), 0xffffffffffffffffUL);
/* XXX: Better CPU test needed here. */
if ((cpu_id & 0xf00) == 0xf00)
mca_record_entry(i);
/* Clear all errors. */
wrmsr(MSR_MC_STATUS(i), 0);
}
sched_unpin();
}
load_cr4(rcr4() | CR4_MCE);
}
/* Called when a machine check exception fires. */
int
mca_intr(void)
{
uint64_t mcg_status;
int recoverable;
if (!(cpu_feature & CPUID_MCA)) {
/*
* Just print the values of the old Pentium registers
* and panic.
*/
printf("MC Type: 0x%llx Address: 0x%llx\n",
rdmsr(MSR_P5_MC_TYPE), rdmsr(MSR_P5_MC_ADDR));
return (0);
}
/* Scan the banks and check for any non-recoverable errors. */
recoverable = mca_scan(1);
mcg_status = rdmsr(MSR_MCG_STATUS);
if (!(mcg_status & MCG_STATUS_RIPV))
recoverable = 0;
/* Clear MCIP. */
wrmsr(MSR_MCG_STATUS, mcg_status & ~MCG_STATUS_MCIP);
return (recoverable);
}

View File

@ -74,6 +74,7 @@ __FBSDID("$FreeBSD$");
#include <machine/apicreg.h>
#include <machine/clock.h>
#include <machine/cputypes.h>
#include <machine/mca.h>
#include <machine/md_var.h>
#include <machine/mp_watchdog.h>
#include <machine/pcb.h>
@ -704,6 +705,8 @@ init_secondary(void)
KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
PCPU_SET(curthread, PCPU_GET(idlethread));
mca_init();
mtx_lock_spin(&ap_boot_mtx);
/* Init local apic for irq's */

View File

@ -90,6 +90,7 @@ __FBSDID("$FreeBSD$");
#include <machine/cpu.h>
#include <machine/intr_machdep.h>
#include <machine/mca.h>
#include <machine/md_var.h>
#include <machine/pcb.h>
#ifdef SMP
@ -239,6 +240,12 @@ trap(struct trapframe *frame)
goto out;
#endif
if (type == T_MCHK) {
if (!mca_intr())
trap_fatal(frame, 0);
goto out;
}
#ifdef KDTRACE_HOOKS
/*
* A trap can occur while DTrace executes a probe. Before

48
sys/i386/include/mca.h Normal file
View File

@ -0,0 +1,48 @@
/*-
* Copyright (c) 2009 Advanced Computing Technologies LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef __MACHINE_MCA_H__
#define __MACHINE_MCA_H__
struct mca_record {
uint64_t mr_status;
uint64_t mr_addr;
uint64_t mr_misc;
uint64_t mr_tsc;
int mr_apic_id;
};
#ifdef _KERNEL
void mca_init(void);
int mca_intr(void);
#endif
#endif /* !__MACHINE_MCA_H__ */

View File

@ -414,6 +414,34 @@
#define DIR0 0xfe
#define DIR1 0xff
/*
* Machine Check register constants.
*/
#define MCG_CAP_COUNT 0x000000ff
#define MCG_CAP_CTL_P 0x00000100
#define MCG_CAP_EXT_P 0x00000200
#define MCG_CAP_TES_P 0x00000800
#define MCG_CAP_EXT_CNT 0x00ff0000
#define MCG_STATUS_RIPV 0x00000001
#define MCG_STATUS_EIPV 0x00000002
#define MCG_STATUS_MCIP 0x00000004
#define MCG_CTL_ENABLE 0xffffffffffffffffUL
#define MCG_CTL_DISABLE 0x0000000000000000UL
#define MSR_MC_CTL(x) (MSR_MC0_CTL + (x) * 4)
#define MSR_MC_STATUS(x) (MSR_MC0_STATUS + (x) * 4)
#define MSR_MC_ADDR(x) (MSR_MC0_ADDR + (x) * 4)
#define MSR_MC_MISC(x) (MSR_MC0_MISC + (x) * 4)
#define MC_STATUS_MCA_ERROR 0x000000000000ffffUL
#define MC_STATUS_MODEL_ERROR 0x00000000ffff0000UL
#define MC_STATUS_OTHER_INFO 0x01ffffff00000000UL
#define MC_STATUS_PCC 0x0200000000000000UL
#define MC_STATUS_ADDRV 0x0400000000000000UL
#define MC_STATUS_MISCV 0x0800000000000000UL
#define MC_STATUS_EN 0x1000000000000000UL
#define MC_STATUS_UC 0x2000000000000000UL
#define MC_STATUS_OVER 0x4000000000000000UL
#define MC_STATUS_VAL 0x8000000000000000UL
/*
* The following four 3-byte registers control the non-cacheable regions.
* These registers must be written as three separate bytes.