- Extend the machine check record structure to include several fields useful
for parsing model-specific and other fields in machine check events including the global machine check capabilities and status registers, CPU identification, and the FreeBSD CPU ID. - Report these added fields in the console log of a machine check so that a record structure can be reconstituted from the console messages. - Parse new architectural errors including memory controller errors. MFC after: 1 week
This commit is contained in:
parent
c998036d71
commit
a311ca2f45
@ -186,19 +186,46 @@ mca_error_request(uint16_t mca_error)
|
||||
return ("???");
|
||||
}
|
||||
|
||||
static const char *
|
||||
mca_error_mmtype(uint16_t mca_error)
|
||||
{
|
||||
|
||||
switch ((mca_error & 0x70) >> 4) {
|
||||
case 0x0:
|
||||
return ("GEN");
|
||||
case 0x1:
|
||||
return ("RD");
|
||||
case 0x2:
|
||||
return ("WR");
|
||||
case 0x3:
|
||||
return ("AC");
|
||||
case 0x4:
|
||||
return ("MS");
|
||||
}
|
||||
return ("???");
|
||||
}
|
||||
|
||||
/* Dump details about a single machine check. */
|
||||
static void __nonnull(1)
|
||||
mca_log(const struct mca_record *rec)
|
||||
{
|
||||
uint16_t mca_error;
|
||||
|
||||
printf("MCA: bank %d, status 0x%016llx\n", rec->mr_bank,
|
||||
printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank,
|
||||
(long long)rec->mr_status);
|
||||
printf("MCA: CPU %d ", rec->mr_apic_id);
|
||||
printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n",
|
||||
(long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status);
|
||||
printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor,
|
||||
rec->mr_cpu_id, rec->mr_apic_id);
|
||||
printf("MCA: CPU %d ", rec->mr_cpu);
|
||||
if (rec->mr_status & MC_STATUS_UC)
|
||||
printf("UNCOR ");
|
||||
else
|
||||
else {
|
||||
printf("COR ");
|
||||
if (rec->mr_mcg_cap & MCG_CAP_TES_P)
|
||||
printf("(%lld) ", ((long long)rec->mr_status &
|
||||
MC_STATUS_COR_COUNT) >> 38);
|
||||
}
|
||||
if (rec->mr_status & MC_STATUS_PCC)
|
||||
printf("PCC ");
|
||||
if (rec->mr_status & MC_STATUS_OVER)
|
||||
@ -221,6 +248,9 @@ mca_log(const struct mca_record *rec)
|
||||
case 0x0004:
|
||||
printf("FRC error");
|
||||
break;
|
||||
case 0x0005:
|
||||
printf("internal parity error");
|
||||
break;
|
||||
case 0x0400:
|
||||
printf("internal timer error");
|
||||
break;
|
||||
@ -245,6 +275,17 @@ mca_log(const struct mca_record *rec)
|
||||
break;
|
||||
}
|
||||
|
||||
/* Memory controller error. */
|
||||
if ((mca_error & 0xef80) == 0x0080) {
|
||||
printf("%s channel ", mca_error_mmtype(mca_error));
|
||||
if ((mca_error & 0x000f) != 0x000f)
|
||||
printf("%d", mca_error & 0x000f);
|
||||
else
|
||||
printf("??");
|
||||
printf(" memory error");
|
||||
break;
|
||||
}
|
||||
|
||||
/* Cache error. */
|
||||
if ((mca_error & 0xef00) == 0x0100) {
|
||||
printf("%sCACHE %s %s error",
|
||||
@ -322,6 +363,11 @@ mca_check_status(int bank, struct mca_record *rec)
|
||||
rec->mr_misc = rdmsr(MSR_MC_MISC(bank));
|
||||
rec->mr_tsc = rdtsc();
|
||||
rec->mr_apic_id = PCPU_GET(apic_id);
|
||||
rec->mr_mcg_cap = rdmsr(MSR_MCG_CAP);
|
||||
rec->mr_mcg_status = rdmsr(MSR_MCG_STATUS);
|
||||
rec->mr_cpu_id = cpu_id;
|
||||
rec->mr_cpu_vendor_id = cpu_vendor_id;
|
||||
rec->mr_cpu = PCPU_GET(cpuid);
|
||||
|
||||
/*
|
||||
* Clear machine check. Don't do this for uncorrectable
|
||||
|
@ -37,6 +37,11 @@ struct mca_record {
|
||||
uint64_t mr_tsc;
|
||||
int mr_apic_id;
|
||||
int mr_bank;
|
||||
uint64_t mr_mcg_cap;
|
||||
uint64_t mr_mcg_status;
|
||||
int mr_cpu_id;
|
||||
int mr_cpu_vendor_id;
|
||||
int mr_cpu;
|
||||
};
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
@ -267,6 +267,7 @@
|
||||
#define MSR_MTRR16kBase 0x258
|
||||
#define MSR_MTRR4kBase 0x268
|
||||
#define MSR_PAT 0x277
|
||||
#define MSR_MC0_CTL2 0x280
|
||||
#define MSR_MTRRdefType 0x2ff
|
||||
#define MSR_MC0_CTL 0x400
|
||||
#define MSR_MC0_STATUS 0x401
|
||||
@ -352,8 +353,10 @@
|
||||
#define MCG_CAP_COUNT 0x000000ff
|
||||
#define MCG_CAP_CTL_P 0x00000100
|
||||
#define MCG_CAP_EXT_P 0x00000200
|
||||
#define MCG_CAP_CMCI_P 0x00000400
|
||||
#define MCG_CAP_TES_P 0x00000800
|
||||
#define MCG_CAP_EXT_CNT 0x00ff0000
|
||||
#define MCG_CAP_SER_P 0x01000000
|
||||
#define MCG_STATUS_RIPV 0x00000001
|
||||
#define MCG_STATUS_EIPV 0x00000002
|
||||
#define MCG_STATUS_MCIP 0x00000004
|
||||
@ -363,9 +366,14 @@
|
||||
#define MSR_MC_STATUS(x) (MSR_MC0_STATUS + (x) * 4)
|
||||
#define MSR_MC_ADDR(x) (MSR_MC0_ADDR + (x) * 4)
|
||||
#define MSR_MC_MISC(x) (MSR_MC0_MISC + (x) * 4)
|
||||
#define MSR_MC_CTL2(x) (MSR_MC0_CTL2 + (x)) /* If MCG_CAP_CMCI_P */
|
||||
#define MC_STATUS_MCA_ERROR 0x000000000000ffffUL
|
||||
#define MC_STATUS_MODEL_ERROR 0x00000000ffff0000UL
|
||||
#define MC_STATUS_OTHER_INFO 0x01ffffff00000000UL
|
||||
#define MC_STATUS_COR_COUNT 0x001fffc000000000UL /* If MCG_CAP_TES_P */
|
||||
#define MC_STATUS_TES_STATUS 0x0060000000000000UL /* If MCG_CAP_TES_P */
|
||||
#define MC_STATUS_AR 0x0080000000000000UL /* If MCG_CAP_CMCI_P */
|
||||
#define MC_STATUS_S 0x0100000000000000UL /* If MCG_CAP_CMCI_P */
|
||||
#define MC_STATUS_PCC 0x0200000000000000UL
|
||||
#define MC_STATUS_ADDRV 0x0400000000000000UL
|
||||
#define MC_STATUS_MISCV 0x0800000000000000UL
|
||||
@ -373,6 +381,10 @@
|
||||
#define MC_STATUS_UC 0x2000000000000000UL
|
||||
#define MC_STATUS_OVER 0x4000000000000000UL
|
||||
#define MC_STATUS_VAL 0x8000000000000000UL
|
||||
#define MC_MISC_RA_LSB 0x000000000000003fUL /* If MCG_CAP_SER_P */
|
||||
#define MC_MISC_ADDRESS_MODE 0x00000000000001c0UL /* If MCG_CAP_SER_P */
|
||||
#define MC_CTL2_THRESHOLD 0x0000000000003fffUL
|
||||
#define MC_CTL2_CMCI_EN 0x0000000040000000UL
|
||||
|
||||
/*
|
||||
* The following four 3-byte registers control the non-cacheable regions.
|
||||
|
@ -177,19 +177,46 @@ mca_error_request(uint16_t mca_error)
|
||||
return ("???");
|
||||
}
|
||||
|
||||
static const char *
|
||||
mca_error_mmtype(uint16_t mca_error)
|
||||
{
|
||||
|
||||
switch ((mca_error & 0x70) >> 4) {
|
||||
case 0x0:
|
||||
return ("GEN");
|
||||
case 0x1:
|
||||
return ("RD");
|
||||
case 0x2:
|
||||
return ("WR");
|
||||
case 0x3:
|
||||
return ("AC");
|
||||
case 0x4:
|
||||
return ("MS");
|
||||
}
|
||||
return ("???");
|
||||
}
|
||||
|
||||
/* Dump details about a single machine check. */
|
||||
static void __nonnull(1)
|
||||
mca_log(const struct mca_record *rec)
|
||||
{
|
||||
uint16_t mca_error;
|
||||
|
||||
printf("MCA: bank %d, status 0x%016llx\n", rec->mr_bank,
|
||||
printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank,
|
||||
(long long)rec->mr_status);
|
||||
printf("MCA: CPU %d ", rec->mr_apic_id);
|
||||
printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n",
|
||||
(long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status);
|
||||
printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor,
|
||||
rec->mr_cpu_id, rec->mr_apic_id);
|
||||
printf("MCA: CPU %d ", rec->mr_cpu);
|
||||
if (rec->mr_status & MC_STATUS_UC)
|
||||
printf("UNCOR ");
|
||||
else
|
||||
else {
|
||||
printf("COR ");
|
||||
if (rec->mr_mcg_cap & MCG_CAP_TES_P)
|
||||
printf("(%lld) ", ((long long)rec->mr_status &
|
||||
MC_STATUS_COR_COUNT) >> 38);
|
||||
}
|
||||
if (rec->mr_status & MC_STATUS_PCC)
|
||||
printf("PCC ");
|
||||
if (rec->mr_status & MC_STATUS_OVER)
|
||||
@ -212,6 +239,9 @@ mca_log(const struct mca_record *rec)
|
||||
case 0x0004:
|
||||
printf("FRC error");
|
||||
break;
|
||||
case 0x0005:
|
||||
printf("internal parity error");
|
||||
break;
|
||||
case 0x0400:
|
||||
printf("internal timer error");
|
||||
break;
|
||||
@ -236,6 +266,17 @@ mca_log(const struct mca_record *rec)
|
||||
break;
|
||||
}
|
||||
|
||||
/* Memory controller error. */
|
||||
if ((mca_error & 0xef80) == 0x0080) {
|
||||
printf("%s channel ", mca_error_mmtype(mca_error));
|
||||
if ((mca_error & 0x000f) != 0x000f)
|
||||
printf("%d", mca_error & 0x000f);
|
||||
else
|
||||
printf("??");
|
||||
printf(" memory error");
|
||||
break;
|
||||
}
|
||||
|
||||
/* Cache error. */
|
||||
if ((mca_error & 0xef00) == 0x0100) {
|
||||
printf("%sCACHE %s %s error",
|
||||
@ -313,6 +354,11 @@ mca_check_status(int bank, struct mca_record *rec)
|
||||
rec->mr_misc = rdmsr(MSR_MC_MISC(bank));
|
||||
rec->mr_tsc = rdtsc();
|
||||
rec->mr_apic_id = PCPU_GET(apic_id);
|
||||
rec->mr_mcg_cap = rdmsr(MSR_MCG_CAP);
|
||||
rec->mr_mcg_status = rdmsr(MSR_MCG_STATUS);
|
||||
rec->mr_cpu_id = cpu_id;
|
||||
rec->mr_cpu_vendor_id = cpu_vendor_id;
|
||||
rec->mr_cpu = PCPU_GET(cpuid);
|
||||
|
||||
/*
|
||||
* Clear machine check. Don't do this for uncorrectable
|
||||
|
@ -37,6 +37,11 @@ struct mca_record {
|
||||
uint64_t mr_tsc;
|
||||
int mr_apic_id;
|
||||
int mr_bank;
|
||||
uint64_t mr_mcg_cap;
|
||||
uint64_t mr_mcg_status;
|
||||
int mr_cpu_id;
|
||||
int mr_cpu_vendor_id;
|
||||
int mr_cpu;
|
||||
};
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
@ -273,6 +273,7 @@
|
||||
#define MSR_MTRR16kBase 0x258
|
||||
#define MSR_MTRR4kBase 0x268
|
||||
#define MSR_PAT 0x277
|
||||
#define MSR_MC0_CTL2 0x280
|
||||
#define MSR_MTRRdefType 0x2ff
|
||||
#define MSR_MC0_CTL 0x400
|
||||
#define MSR_MC0_STATUS 0x401
|
||||
@ -421,8 +422,10 @@
|
||||
#define MCG_CAP_COUNT 0x000000ff
|
||||
#define MCG_CAP_CTL_P 0x00000100
|
||||
#define MCG_CAP_EXT_P 0x00000200
|
||||
#define MCG_CAP_CMCI_P 0x00000400
|
||||
#define MCG_CAP_TES_P 0x00000800
|
||||
#define MCG_CAP_EXT_CNT 0x00ff0000
|
||||
#define MCG_CAP_SER_P 0x01000000
|
||||
#define MCG_STATUS_RIPV 0x00000001
|
||||
#define MCG_STATUS_EIPV 0x00000002
|
||||
#define MCG_STATUS_MCIP 0x00000004
|
||||
@ -432,9 +435,14 @@
|
||||
#define MSR_MC_STATUS(x) (MSR_MC0_STATUS + (x) * 4)
|
||||
#define MSR_MC_ADDR(x) (MSR_MC0_ADDR + (x) * 4)
|
||||
#define MSR_MC_MISC(x) (MSR_MC0_MISC + (x) * 4)
|
||||
#define MSR_MC_CTL2(x) (MSR_MC0_CTL2 + (x)) /* If MCG_CAP_CMCI_P */
|
||||
#define MC_STATUS_MCA_ERROR 0x000000000000ffffULL
|
||||
#define MC_STATUS_MODEL_ERROR 0x00000000ffff0000ULL
|
||||
#define MC_STATUS_OTHER_INFO 0x01ffffff00000000ULL
|
||||
#define MC_STATUS_COR_COUNT 0x001fffc000000000ULL /* If MCG_CAP_TES_P */
|
||||
#define MC_STATUS_TES_STATUS 0x0060000000000000ULL /* If MCG_CAP_TES_P */
|
||||
#define MC_STATUS_AR 0x0080000000000000ULL /* If MCG_CAP_CMCI_P */
|
||||
#define MC_STATUS_S 0x0100000000000000ULL /* If MCG_CAP_CMCI_P */
|
||||
#define MC_STATUS_PCC 0x0200000000000000ULL
|
||||
#define MC_STATUS_ADDRV 0x0400000000000000ULL
|
||||
#define MC_STATUS_MISCV 0x0800000000000000ULL
|
||||
@ -442,6 +450,10 @@
|
||||
#define MC_STATUS_UC 0x2000000000000000ULL
|
||||
#define MC_STATUS_OVER 0x4000000000000000ULL
|
||||
#define MC_STATUS_VAL 0x8000000000000000ULL
|
||||
#define MC_MISC_RA_LSB 0x000000000000003fULL /* If MCG_CAP_SER_P */
|
||||
#define MC_MISC_ADDRESS_MODE 0x00000000000001c0ULL /* If MCG_CAP_SER_P */
|
||||
#define MC_CTL2_THRESHOLD 0x0000000000003fffULL
|
||||
#define MC_CTL2_CMCI_EN 0x0000000040000000ULL
|
||||
|
||||
/*
|
||||
* The following four 3-byte registers control the non-cacheable regions.
|
||||
|
Loading…
Reference in New Issue
Block a user