iflib:
Add internal tracking of smp startup status to reliably figure out what methods are to be used to get gtaskqueue up and running. e1000: Calculating this pointer gives undefined behaviour when (last == -1) (it is before the buffer). The pointer is always followed. Panics occurred when it points to an unmapped page. Otherwise, the pointed-to garbage tends to not have the E1000_TXD_STAT_DD bit set in it, so in the broken case the loop was usually null and the function just returned, and this was acidentally correct. Submitted by: bde Reported by: Matt Macy <mmacy@nextbsd.org>
This commit is contained in:
parent
36fa5d5b64
commit
bd84f70044
@ -408,10 +408,13 @@ em_isc_txd_credits_update(void *arg, uint16_t txqid, uint32_t cidx_init, bool cl
|
|||||||
cidx = cidx_init;
|
cidx = cidx_init;
|
||||||
buf = &txr->tx_buffers[cidx];
|
buf = &txr->tx_buffers[cidx];
|
||||||
tx_desc = &txr->tx_base[cidx];
|
tx_desc = &txr->tx_base[cidx];
|
||||||
last = buf->eop;
|
last = buf->eop;
|
||||||
|
if (last == -1)
|
||||||
|
return (processed);
|
||||||
eop_desc = &txr->tx_base[last];
|
eop_desc = &txr->tx_base[last];
|
||||||
|
|
||||||
DPRINTF(iflib_get_dev(adapter->ctx), "credits_update: cidx_init=%d clear=%d last=%d\n",
|
DPRINTF(iflib_get_dev(adapter->ctx),
|
||||||
|
"credits_update: cidx_init=%d clear=%d last=%d\n",
|
||||||
cidx_init, clear, last);
|
cidx_init, clear, last);
|
||||||
/*
|
/*
|
||||||
* What this does is get the index of the
|
* What this does is get the index of the
|
||||||
@ -420,7 +423,7 @@ em_isc_txd_credits_update(void *arg, uint16_t txqid, uint32_t cidx_init, bool cl
|
|||||||
* simple comparison on the inner while loop.
|
* simple comparison on the inner while loop.
|
||||||
*/
|
*/
|
||||||
if (++last == scctx->isc_ntxd[0])
|
if (++last == scctx->isc_ntxd[0])
|
||||||
last = 0;
|
last = 0;
|
||||||
done = last;
|
done = last;
|
||||||
|
|
||||||
|
|
||||||
|
@ -630,6 +630,29 @@ taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
|
|||||||
|
|
||||||
return (idx);
|
return (idx);
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
* smp_started is unusable since it is not set for UP kernels or even for
|
||||||
|
* SMP kernels when there is 1 CPU. This is usually handled by adding a
|
||||||
|
* (mp_ncpus == 1) test, but that would be broken here since we need to
|
||||||
|
* to synchronize with the SI_SUB_SMP ordering. Even in the pure SMP case
|
||||||
|
* smp_started only gives a fuzzy ordering relative to SI_SUB_SMP.
|
||||||
|
*
|
||||||
|
* So maintain our own flag. It must be set after all CPUs are started
|
||||||
|
* and before SI_SUB_SMP:SI_ORDER_ANY so that the SYSINIT for delayed
|
||||||
|
* adjustment is properly delayed. SI_ORDER_FOURTH is clearly before
|
||||||
|
* SI_ORDER_ANY and unclearly after the CPUs are started. It would be
|
||||||
|
* simpler for adjustment to pass a flag indicating if it is delayed.
|
||||||
|
*/
|
||||||
|
static int tqg_smp_started;
|
||||||
|
|
||||||
|
static void
|
||||||
|
tqg_record_smp_started(void *arg)
|
||||||
|
{
|
||||||
|
tqg_smp_started = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH,
|
||||||
|
tqg_record_smp_started, NULL);
|
||||||
|
|
||||||
void
|
void
|
||||||
taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
|
taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
|
||||||
@ -647,7 +670,7 @@ taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
|
|||||||
qgroup->tqg_queue[qid].tgc_cnt++;
|
qgroup->tqg_queue[qid].tgc_cnt++;
|
||||||
LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
|
LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
|
||||||
gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
|
gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
|
||||||
if (irq != -1 && (smp_started || mp_ncpus == 1)) {
|
if (irq != -1 && tqg_smp_started ) {
|
||||||
gtask->gt_cpu = qgroup->tqg_queue[qid].tgc_cpu;
|
gtask->gt_cpu = qgroup->tqg_queue[qid].tgc_cpu;
|
||||||
CPU_ZERO(&mask);
|
CPU_ZERO(&mask);
|
||||||
CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
|
CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
|
||||||
@ -697,7 +720,7 @@ taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
|
|||||||
gtask->gt_irq = irq;
|
gtask->gt_irq = irq;
|
||||||
gtask->gt_cpu = cpu;
|
gtask->gt_cpu = cpu;
|
||||||
mtx_lock(&qgroup->tqg_lock);
|
mtx_lock(&qgroup->tqg_lock);
|
||||||
if (smp_started || mp_ncpus == 1) {
|
if (tqg_smp_started) {
|
||||||
for (i = 0; i < qgroup->tqg_cnt; i++)
|
for (i = 0; i < qgroup->tqg_cnt; i++)
|
||||||
if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
|
if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
|
||||||
qid = i;
|
qid = i;
|
||||||
@ -731,7 +754,7 @@ taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtas
|
|||||||
qid = -1;
|
qid = -1;
|
||||||
irq = gtask->gt_irq;
|
irq = gtask->gt_irq;
|
||||||
cpu = gtask->gt_cpu;
|
cpu = gtask->gt_cpu;
|
||||||
MPASS(smp_started || mp_ncpus == 1);
|
MPASS(tqg_smp_started);
|
||||||
mtx_lock(&qgroup->tqg_lock);
|
mtx_lock(&qgroup->tqg_lock);
|
||||||
for (i = 0; i < qgroup->tqg_cnt; i++)
|
for (i = 0; i < qgroup->tqg_cnt; i++)
|
||||||
if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
|
if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
|
||||||
@ -824,9 +847,10 @@ _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
|
|||||||
|
|
||||||
mtx_assert(&qgroup->tqg_lock, MA_OWNED);
|
mtx_assert(&qgroup->tqg_lock, MA_OWNED);
|
||||||
|
|
||||||
if (cnt < 1 || cnt * stride > mp_ncpus || (!smp_started && (mp_ncpus != 1))) {
|
if (cnt < 1 || cnt * stride > mp_ncpus || !tqg_smp_started) {
|
||||||
printf("taskqgroup_adjust failed cnt: %d stride: %d mp_ncpus: %d smp_started: %d\n",
|
printf("%s: failed cnt: %d stride: %d "
|
||||||
cnt, stride, mp_ncpus, smp_started);
|
"mp_ncpus: %d smp_started: %d\n",
|
||||||
|
__func__, cnt, stride, mp_ncpus, smp_started);
|
||||||
return (EINVAL);
|
return (EINVAL);
|
||||||
}
|
}
|
||||||
if (qgroup->tqg_adjusting) {
|
if (qgroup->tqg_adjusting) {
|
||||||
|
@ -1193,13 +1193,36 @@ iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count)
|
|||||||
iflib_dma_free(*dmaiter);
|
iflib_dma_free(*dmaiter);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef EARLY_AP_STARTUP
|
||||||
|
static const int iflib_started = 1;
|
||||||
|
#else
|
||||||
|
/*
|
||||||
|
* We used to abuse the smp_started flag to decide if the queues have been
|
||||||
|
* fully initialized (by late taskqgroup_adjust() calls in a SYSINIT()).
|
||||||
|
* That gave bad races, since the SYSINIT() runs strictly after smp_started
|
||||||
|
* is set. Run a SYSINIT() strictly after that to just set a usable
|
||||||
|
* completion flag.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int iflib_started;
|
||||||
|
|
||||||
|
static void
|
||||||
|
iflib_record_started(void *arg)
|
||||||
|
{
|
||||||
|
iflib_started = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
SYSINIT(iflib_record_started, SI_SUB_SMP + 1, SI_ORDER_FIRST,
|
||||||
|
iflib_record_started, NULL);
|
||||||
|
#endif
|
||||||
|
|
||||||
static int
|
static int
|
||||||
iflib_fast_intr(void *arg)
|
iflib_fast_intr(void *arg)
|
||||||
{
|
{
|
||||||
iflib_filter_info_t info = arg;
|
iflib_filter_info_t info = arg;
|
||||||
struct grouptask *gtask = info->ifi_task;
|
struct grouptask *gtask = info->ifi_task;
|
||||||
|
|
||||||
if (!smp_started && mp_ncpus > 1)
|
if (!iflib_started)
|
||||||
return (FILTER_HANDLED);
|
return (FILTER_HANDLED);
|
||||||
|
|
||||||
DBG_COUNTER_INC(fast_intrs);
|
DBG_COUNTER_INC(fast_intrs);
|
||||||
@ -3728,7 +3751,16 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
|
|||||||
device_printf(dev, "qset structure setup failed %d\n", err);
|
device_printf(dev, "qset structure setup failed %d\n", err);
|
||||||
goto fail_queues;
|
goto fail_queues;
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
* Group taskqueues aren't properly set up until SMP is started,
|
||||||
|
* so we disable interrupts until we can handle them post
|
||||||
|
* SI_SUB_SMP.
|
||||||
|
*
|
||||||
|
* XXX: disabling interrupts doesn't actually work, at least for
|
||||||
|
* the non-MSI case. When they occur before SI_SUB_SMP completes,
|
||||||
|
* we do null handling and depend on this not causing too large an
|
||||||
|
* interrupt storm.
|
||||||
|
*/
|
||||||
IFDI_INTR_DISABLE(ctx);
|
IFDI_INTR_DISABLE(ctx);
|
||||||
if (msix > 1 && (err = IFDI_MSIX_INTR_ASSIGN(ctx, msix)) != 0) {
|
if (msix > 1 && (err = IFDI_MSIX_INTR_ASSIGN(ctx, msix)) != 0) {
|
||||||
device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n", err);
|
device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n", err);
|
||||||
@ -4556,13 +4588,6 @@ iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int *
|
|||||||
void *q;
|
void *q;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
/*
|
|
||||||
* group taskqueues aren't properly set up until SMP is started
|
|
||||||
* so we disable interrupts until we can handle them post
|
|
||||||
* SI_SUB_SMP
|
|
||||||
*/
|
|
||||||
IFDI_INTR_DISABLE(ctx);
|
|
||||||
|
|
||||||
q = &ctx->ifc_rxqs[0];
|
q = &ctx->ifc_rxqs[0];
|
||||||
info = &rxq[0].ifr_filter_info;
|
info = &rxq[0].ifr_filter_info;
|
||||||
gtask = &rxq[0].ifr_task;
|
gtask = &rxq[0].ifr_task;
|
||||||
|
@ -81,7 +81,7 @@ int taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride);
|
|||||||
extern struct taskqgroup *qgroup_##name
|
extern struct taskqgroup *qgroup_##name
|
||||||
|
|
||||||
|
|
||||||
#if (!defined(SMP) || defined(EARLY_AP_STARTUP))
|
#ifdef EARLY_AP_STARTUP
|
||||||
#define TASKQGROUP_DEFINE(name, cnt, stride) \
|
#define TASKQGROUP_DEFINE(name, cnt, stride) \
|
||||||
\
|
\
|
||||||
struct taskqgroup *qgroup_##name; \
|
struct taskqgroup *qgroup_##name; \
|
||||||
@ -95,7 +95,8 @@ taskqgroup_define_##name(void *arg) \
|
|||||||
\
|
\
|
||||||
SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST, \
|
SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST, \
|
||||||
taskqgroup_define_##name, NULL)
|
taskqgroup_define_##name, NULL)
|
||||||
#else /* SMP && !EARLY_AP_STARTUP */
|
|
||||||
|
#else /* !EARLY_AP_STARTUP */
|
||||||
#define TASKQGROUP_DEFINE(name, cnt, stride) \
|
#define TASKQGROUP_DEFINE(name, cnt, stride) \
|
||||||
\
|
\
|
||||||
struct taskqgroup *qgroup_##name; \
|
struct taskqgroup *qgroup_##name; \
|
||||||
@ -104,15 +105,6 @@ static void \
|
|||||||
taskqgroup_define_##name(void *arg) \
|
taskqgroup_define_##name(void *arg) \
|
||||||
{ \
|
{ \
|
||||||
qgroup_##name = taskqgroup_create(#name); \
|
qgroup_##name = taskqgroup_create(#name); \
|
||||||
/* Adjustment will be null unless smp_cpus == 1. */ \
|
|
||||||
/* \
|
|
||||||
* XXX this was intended to fix the smp_cpus == 1 case, but \
|
|
||||||
* doesn't actually work for that. It gives thes same strange \
|
|
||||||
* panic as adjustment at SI_SUB_INIT_IF:SI_ORDER_ANY for a \
|
|
||||||
* device that works with a pure UP kernel. \
|
|
||||||
*/ \
|
|
||||||
/* XXX this code is common now, so should not be ifdefed. */ \
|
|
||||||
taskqgroup_adjust(qgroup_##name, (cnt), (stride)); \
|
|
||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST, \
|
SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST, \
|
||||||
@ -121,17 +113,13 @@ SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST, \
|
|||||||
static void \
|
static void \
|
||||||
taskqgroup_adjust_##name(void *arg) \
|
taskqgroup_adjust_##name(void *arg) \
|
||||||
{ \
|
{ \
|
||||||
/* \
|
|
||||||
* Adjustment when smp_cpus > 1 only works accidentally \
|
|
||||||
* (when there is no device interrupt before adjustment). \
|
|
||||||
*/ \
|
|
||||||
taskqgroup_adjust(qgroup_##name, (cnt), (stride)); \
|
taskqgroup_adjust(qgroup_##name, (cnt), (stride)); \
|
||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
SYSINIT(taskqgroup_adj_##name, SI_SUB_SMP, SI_ORDER_ANY, \
|
SYSINIT(taskqgroup_adj_##name, SI_SUB_SMP, SI_ORDER_ANY, \
|
||||||
taskqgroup_adjust_##name, NULL); \
|
taskqgroup_adjust_##name, NULL); \
|
||||||
|
|
||||||
#endif /* !SMP || EARLY_AP_STARTUP */
|
#endif /* EARLY_AP_STARTUP */
|
||||||
|
|
||||||
TASKQGROUP_DECLARE(net);
|
TASKQGROUP_DECLARE(net);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user