Improve ioat(4) NUMA-awareness.

Allocate ioat->ring memory from the device domain.
Schedule ioat->poll_timer to the first CPU of the device domain.

According to pcm-numa tool from intel-pcm port, this reduces number of
remote DRAM accesses while copying data by 75%.  And unless it is a noise,
I've noticed some speed improvement when copying data to other domain.

MFC after:	1 week
Sponsored by:	iXsystems, Inc.
This commit is contained in:
mav 2019-09-19 22:15:57 +00:00
parent 843a2cac89
commit 8000ef5c62
2 changed files with 14 additions and 5 deletions

View File

@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/conf.h>
#include <sys/domainset.h>
#include <sys/fail.h>
#include <sys/ioccom.h>
#include <sys/kernel.h>
@ -44,6 +45,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/rman.h>
#include <sys/sbuf.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/taskqueue.h>
#include <sys/time.h>
@ -266,6 +268,11 @@ ioat_attach(device_t device)
ioat = DEVICE2SOFTC(device);
ioat->device = device;
if (bus_get_domain(device, &ioat->domain) != 0)
ioat->domain = 0;
ioat->cpu = CPU_FFS(&cpuset_domain[ioat->domain]) - 1;
if (ioat->cpu < 0)
ioat->cpu = CPU_FIRST();
error = ioat_map_pci_bar(ioat);
if (error != 0)
@ -600,8 +607,8 @@ ioat3_attach(device_t device)
__func__, error);
return (error);
}
ioat->ring = malloc(num_descriptors * sizeof(*ring), M_IOAT,
M_ZERO | M_WAITOK);
ioat->ring = malloc_domainset(num_descriptors * sizeof(*ring), M_IOAT,
DOMAINSET_PREF(ioat->domain), M_ZERO | M_WAITOK);
ring = ioat->ring;
for (i = 0; i < num_descriptors; i++) {
@ -1107,8 +1114,8 @@ ioat_release(bus_dmaengine_t dmaengine)
(uint16_t)ioat->head);
if (!callout_pending(&ioat->poll_timer)) {
callout_reset(&ioat->poll_timer, 1,
ioat_poll_timer_callback, ioat);
callout_reset_on(&ioat->poll_timer, 1,
ioat_poll_timer_callback, ioat, ioat->cpu);
}
}
mtx_unlock(&ioat->submit_lock);
@ -1644,7 +1651,7 @@ ioat_free_ring(struct ioat_softc *ioat, uint32_t size,
struct ioat_descriptor *ring)
{
free(ring, M_IOAT);
free_domain(ring, M_IOAT);
}
static struct ioat_descriptor *

View File

@ -442,6 +442,8 @@ struct ioat_softc {
})
device_t device;
int domain;
int cpu;
int version;
unsigned chan_idx;