- Extend the machine check record structure to include several fields useful

for parsing model-specific and other fields in machine check events
  including the global machine check capabilities and status registers,
  CPU identification, and the FreeBSD CPU ID.
- Report these added fields in the console log of a machine check so that
  a record structure can be reconstituted from the console messages.
- Parse new architectural errors including memory controller errors.

MFC after:	1 week
This commit is contained in:
John Baldwin 2010-03-16 16:01:19 +00:00
parent c998036d71
commit a311ca2f45
6 changed files with 132 additions and 6 deletions

View File

@ -186,19 +186,46 @@ mca_error_request(uint16_t mca_error)
return ("???");
}
static const char *
mca_error_mmtype(uint16_t mca_error)
{
switch ((mca_error & 0x70) >> 4) {
case 0x0:
return ("GEN");
case 0x1:
return ("RD");
case 0x2:
return ("WR");
case 0x3:
return ("AC");
case 0x4:
return ("MS");
}
return ("???");
}
/* Dump details about a single machine check. */
static void __nonnull(1)
mca_log(const struct mca_record *rec)
{
uint16_t mca_error;
printf("MCA: bank %d, status 0x%016llx\n", rec->mr_bank,
printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank,
(long long)rec->mr_status);
printf("MCA: CPU %d ", rec->mr_apic_id);
printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n",
(long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status);
printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor,
rec->mr_cpu_id, rec->mr_apic_id);
printf("MCA: CPU %d ", rec->mr_cpu);
if (rec->mr_status & MC_STATUS_UC)
printf("UNCOR ");
else
else {
printf("COR ");
if (rec->mr_mcg_cap & MCG_CAP_TES_P)
printf("(%lld) ", ((long long)rec->mr_status &
MC_STATUS_COR_COUNT) >> 38);
}
if (rec->mr_status & MC_STATUS_PCC)
printf("PCC ");
if (rec->mr_status & MC_STATUS_OVER)
@ -221,6 +248,9 @@ mca_log(const struct mca_record *rec)
case 0x0004:
printf("FRC error");
break;
case 0x0005:
printf("internal parity error");
break;
case 0x0400:
printf("internal timer error");
break;
@ -245,6 +275,17 @@ mca_log(const struct mca_record *rec)
break;
}
/* Memory controller error. */
if ((mca_error & 0xef80) == 0x0080) {
printf("%s channel ", mca_error_mmtype(mca_error));
if ((mca_error & 0x000f) != 0x000f)
printf("%d", mca_error & 0x000f);
else
printf("??");
printf(" memory error");
break;
}
/* Cache error. */
if ((mca_error & 0xef00) == 0x0100) {
printf("%sCACHE %s %s error",
@ -322,6 +363,11 @@ mca_check_status(int bank, struct mca_record *rec)
rec->mr_misc = rdmsr(MSR_MC_MISC(bank));
rec->mr_tsc = rdtsc();
rec->mr_apic_id = PCPU_GET(apic_id);
rec->mr_mcg_cap = rdmsr(MSR_MCG_CAP);
rec->mr_mcg_status = rdmsr(MSR_MCG_STATUS);
rec->mr_cpu_id = cpu_id;
rec->mr_cpu_vendor_id = cpu_vendor_id;
rec->mr_cpu = PCPU_GET(cpuid);
/*
* Clear machine check. Don't do this for uncorrectable

View File

@ -37,6 +37,11 @@ struct mca_record {
uint64_t mr_tsc;
int mr_apic_id;
int mr_bank;
uint64_t mr_mcg_cap;
uint64_t mr_mcg_status;
int mr_cpu_id;
int mr_cpu_vendor_id;
int mr_cpu;
};
#ifdef _KERNEL

View File

@ -267,6 +267,7 @@
#define MSR_MTRR16kBase 0x258
#define MSR_MTRR4kBase 0x268
#define MSR_PAT 0x277
#define MSR_MC0_CTL2 0x280
#define MSR_MTRRdefType 0x2ff
#define MSR_MC0_CTL 0x400
#define MSR_MC0_STATUS 0x401
@ -352,8 +353,10 @@
#define MCG_CAP_COUNT 0x000000ff
#define MCG_CAP_CTL_P 0x00000100
#define MCG_CAP_EXT_P 0x00000200
#define MCG_CAP_CMCI_P 0x00000400
#define MCG_CAP_TES_P 0x00000800
#define MCG_CAP_EXT_CNT 0x00ff0000
#define MCG_CAP_SER_P 0x01000000
#define MCG_STATUS_RIPV 0x00000001
#define MCG_STATUS_EIPV 0x00000002
#define MCG_STATUS_MCIP 0x00000004
@ -363,9 +366,14 @@
#define MSR_MC_STATUS(x) (MSR_MC0_STATUS + (x) * 4)
#define MSR_MC_ADDR(x) (MSR_MC0_ADDR + (x) * 4)
#define MSR_MC_MISC(x) (MSR_MC0_MISC + (x) * 4)
#define MSR_MC_CTL2(x) (MSR_MC0_CTL2 + (x)) /* If MCG_CAP_CMCI_P */
#define MC_STATUS_MCA_ERROR 0x000000000000ffffUL
#define MC_STATUS_MODEL_ERROR 0x00000000ffff0000UL
#define MC_STATUS_OTHER_INFO 0x01ffffff00000000UL
#define MC_STATUS_COR_COUNT 0x001fffc000000000UL /* If MCG_CAP_TES_P */
#define MC_STATUS_TES_STATUS 0x0060000000000000UL /* If MCG_CAP_TES_P */
#define MC_STATUS_AR 0x0080000000000000UL /* If MCG_CAP_CMCI_P */
#define MC_STATUS_S 0x0100000000000000UL /* If MCG_CAP_CMCI_P */
#define MC_STATUS_PCC 0x0200000000000000UL
#define MC_STATUS_ADDRV 0x0400000000000000UL
#define MC_STATUS_MISCV 0x0800000000000000UL
@ -373,6 +381,10 @@
#define MC_STATUS_UC 0x2000000000000000UL
#define MC_STATUS_OVER 0x4000000000000000UL
#define MC_STATUS_VAL 0x8000000000000000UL
#define MC_MISC_RA_LSB 0x000000000000003fUL /* If MCG_CAP_SER_P */
#define MC_MISC_ADDRESS_MODE 0x00000000000001c0UL /* If MCG_CAP_SER_P */
#define MC_CTL2_THRESHOLD 0x0000000000003fffUL
#define MC_CTL2_CMCI_EN 0x0000000040000000UL
/*
* The following four 3-byte registers control the non-cacheable regions.

View File

@ -177,19 +177,46 @@ mca_error_request(uint16_t mca_error)
return ("???");
}
static const char *
mca_error_mmtype(uint16_t mca_error)
{
switch ((mca_error & 0x70) >> 4) {
case 0x0:
return ("GEN");
case 0x1:
return ("RD");
case 0x2:
return ("WR");
case 0x3:
return ("AC");
case 0x4:
return ("MS");
}
return ("???");
}
/* Dump details about a single machine check. */
static void __nonnull(1)
mca_log(const struct mca_record *rec)
{
uint16_t mca_error;
printf("MCA: bank %d, status 0x%016llx\n", rec->mr_bank,
printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank,
(long long)rec->mr_status);
printf("MCA: CPU %d ", rec->mr_apic_id);
printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n",
(long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status);
printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor,
rec->mr_cpu_id, rec->mr_apic_id);
printf("MCA: CPU %d ", rec->mr_cpu);
if (rec->mr_status & MC_STATUS_UC)
printf("UNCOR ");
else
else {
printf("COR ");
if (rec->mr_mcg_cap & MCG_CAP_TES_P)
printf("(%lld) ", ((long long)rec->mr_status &
MC_STATUS_COR_COUNT) >> 38);
}
if (rec->mr_status & MC_STATUS_PCC)
printf("PCC ");
if (rec->mr_status & MC_STATUS_OVER)
@ -212,6 +239,9 @@ mca_log(const struct mca_record *rec)
case 0x0004:
printf("FRC error");
break;
case 0x0005:
printf("internal parity error");
break;
case 0x0400:
printf("internal timer error");
break;
@ -236,6 +266,17 @@ mca_log(const struct mca_record *rec)
break;
}
/* Memory controller error. */
if ((mca_error & 0xef80) == 0x0080) {
printf("%s channel ", mca_error_mmtype(mca_error));
if ((mca_error & 0x000f) != 0x000f)
printf("%d", mca_error & 0x000f);
else
printf("??");
printf(" memory error");
break;
}
/* Cache error. */
if ((mca_error & 0xef00) == 0x0100) {
printf("%sCACHE %s %s error",
@ -313,6 +354,11 @@ mca_check_status(int bank, struct mca_record *rec)
rec->mr_misc = rdmsr(MSR_MC_MISC(bank));
rec->mr_tsc = rdtsc();
rec->mr_apic_id = PCPU_GET(apic_id);
rec->mr_mcg_cap = rdmsr(MSR_MCG_CAP);
rec->mr_mcg_status = rdmsr(MSR_MCG_STATUS);
rec->mr_cpu_id = cpu_id;
rec->mr_cpu_vendor_id = cpu_vendor_id;
rec->mr_cpu = PCPU_GET(cpuid);
/*
* Clear machine check. Don't do this for uncorrectable

View File

@ -37,6 +37,11 @@ struct mca_record {
uint64_t mr_tsc;
int mr_apic_id;
int mr_bank;
uint64_t mr_mcg_cap;
uint64_t mr_mcg_status;
int mr_cpu_id;
int mr_cpu_vendor_id;
int mr_cpu;
};
#ifdef _KERNEL

View File

@ -273,6 +273,7 @@
#define MSR_MTRR16kBase 0x258
#define MSR_MTRR4kBase 0x268
#define MSR_PAT 0x277
#define MSR_MC0_CTL2 0x280
#define MSR_MTRRdefType 0x2ff
#define MSR_MC0_CTL 0x400
#define MSR_MC0_STATUS 0x401
@ -421,8 +422,10 @@
#define MCG_CAP_COUNT 0x000000ff
#define MCG_CAP_CTL_P 0x00000100
#define MCG_CAP_EXT_P 0x00000200
#define MCG_CAP_CMCI_P 0x00000400
#define MCG_CAP_TES_P 0x00000800
#define MCG_CAP_EXT_CNT 0x00ff0000
#define MCG_CAP_SER_P 0x01000000
#define MCG_STATUS_RIPV 0x00000001
#define MCG_STATUS_EIPV 0x00000002
#define MCG_STATUS_MCIP 0x00000004
@ -432,9 +435,14 @@
#define MSR_MC_STATUS(x) (MSR_MC0_STATUS + (x) * 4)
#define MSR_MC_ADDR(x) (MSR_MC0_ADDR + (x) * 4)
#define MSR_MC_MISC(x) (MSR_MC0_MISC + (x) * 4)
#define MSR_MC_CTL2(x) (MSR_MC0_CTL2 + (x)) /* If MCG_CAP_CMCI_P */
#define MC_STATUS_MCA_ERROR 0x000000000000ffffULL
#define MC_STATUS_MODEL_ERROR 0x00000000ffff0000ULL
#define MC_STATUS_OTHER_INFO 0x01ffffff00000000ULL
#define MC_STATUS_COR_COUNT 0x001fffc000000000ULL /* If MCG_CAP_TES_P */
#define MC_STATUS_TES_STATUS 0x0060000000000000ULL /* If MCG_CAP_TES_P */
#define MC_STATUS_AR 0x0080000000000000ULL /* If MCG_CAP_CMCI_P */
#define MC_STATUS_S 0x0100000000000000ULL /* If MCG_CAP_CMCI_P */
#define MC_STATUS_PCC 0x0200000000000000ULL
#define MC_STATUS_ADDRV 0x0400000000000000ULL
#define MC_STATUS_MISCV 0x0800000000000000ULL
@ -442,6 +450,10 @@
#define MC_STATUS_UC 0x2000000000000000ULL
#define MC_STATUS_OVER 0x4000000000000000ULL
#define MC_STATUS_VAL 0x8000000000000000ULL
#define MC_MISC_RA_LSB 0x000000000000003fULL /* If MCG_CAP_SER_P */
#define MC_MISC_ADDRESS_MODE 0x00000000000001c0ULL /* If MCG_CAP_SER_P */
#define MC_CTL2_THRESHOLD 0x0000000000003fffULL
#define MC_CTL2_CMCI_EN 0x0000000040000000ULL
/*
* The following four 3-byte registers control the non-cacheable regions.