vhost: interrupts coalescing

Virtio spec say that any IRQ requests are only hints. So try to limit
number of interrupts generated by vhost by defining minimum interval
between sending IRQ. Coalescing is disabled by default. Can be enabled
using RPC command 'set_vhost_controller_coalescing'.

Change-Id: I9b96014d004ea0ea022b4498c6b47d30d867091a
Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
Reviewed-on: https://review.gerrithub.io/378130
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
This commit is contained in:
Pawel Wodkowski 2017-09-11 19:45:56 +02:00 committed by Daniel Verkamp
parent fb0b17a19f
commit ff1863f428
10 changed files with 346 additions and 14 deletions

View File

@ -106,6 +106,28 @@ const char *spdk_vhost_dev_get_name(struct spdk_vhost_dev *vdev);
*/
uint64_t spdk_vhost_dev_get_cpumask(struct spdk_vhost_dev *vdev);
/**
* By default, events are generated when asked, but for high queue depth and
* high IOPS this prove to be inefficient both for guest kernel that have to
* handle a lot more IO completions and for SPDK vhost that need to make more
* syscalls. If enabled, limit amount of events (IRQs) sent to initiator by SPDK
* vhost effectively coalescing couple of completions. This of cource introduce
* IO latency penalty proportional to event delay time.
*
* Actual events delay time when is calculated according to below formula:
* if (delay_base == 0 || IOPS < iops_threshold) {
* delay = 0;
* } else if (IOPS < iops_threshold) {
* delay = delay_base * (iops - iops_threshold) / iops_threshold;
* }
*
* \param vdev vhost device
* \param delay_base_us Base delay time in microseconds. If 0, coalescing is disabled.
* \param iops_threshold IOPS threshold when coalescing is activated
*/
int spdk_vhost_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us,
uint32_t iops_threshold);
/**
* Construct an empty vhost SCSI device. This will create a
* Unix domain socket together with a vhost-user slave server waiting

View File

@ -151,6 +151,117 @@ spdk_vhost_vq_get_desc(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue
return 0;
}
int
spdk_vhost_vq_used_signal(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *virtqueue)
{
if (virtqueue->used_req_cnt == 0) {
return 0;
}
virtqueue->req_cnt += virtqueue->used_req_cnt;
virtqueue->used_req_cnt = 0;
SPDK_DEBUGLOG(SPDK_TRACE_VHOST_RING,
"Queue %td - USED RING: sending IRQ: last used %"PRIu16"\n",
virtqueue - vdev->virtqueue, virtqueue->vring.last_used_idx);
eventfd_write(virtqueue->vring.callfd, (eventfd_t)1);
return 1;
}
static void
check_dev_io_stats(struct spdk_vhost_dev *vdev, uint64_t now)
{
struct spdk_vhost_virtqueue *virtqueue;
uint32_t irq_delay_base = vdev->coalescing_delay_time_base;
uint32_t io_threshold = vdev->coalescing_io_rate_threshold;
uint32_t irq_delay, req_cnt;
uint16_t q_idx;
if (now < vdev->next_stats_check_time) {
return;
}
vdev->next_stats_check_time = now + vdev->stats_check_interval;
for (q_idx = 0; q_idx < vdev->num_queues; q_idx++) {
virtqueue = &vdev->virtqueue[q_idx];
req_cnt = virtqueue->req_cnt + virtqueue->used_req_cnt;
if (req_cnt <= io_threshold) {
continue;
}
irq_delay = (irq_delay_base * (req_cnt - io_threshold)) / io_threshold;
virtqueue->irq_delay_time = (uint32_t) spdk_min(0, irq_delay);
virtqueue->req_cnt = 0;
virtqueue->next_event_time = now;
}
}
void
spdk_vhost_dev_used_signal(struct spdk_vhost_dev *vdev)
{
struct spdk_vhost_virtqueue *virtqueue;
uint64_t now;
uint16_t q_idx;
if (vdev->coalescing_delay_time_base == 0) {
for (q_idx = 0; q_idx < vdev->num_queues; q_idx++) {
virtqueue = &vdev->virtqueue[q_idx];
if (virtqueue->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) {
continue;
}
spdk_vhost_vq_used_signal(vdev, virtqueue);
}
} else {
now = spdk_get_ticks();
check_dev_io_stats(vdev, now);
for (q_idx = 0; q_idx < vdev->num_queues; q_idx++) {
virtqueue = &vdev->virtqueue[q_idx];
/* No need for event right now */
if (now < virtqueue->next_event_time ||
(virtqueue->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
continue;
}
if (!spdk_vhost_vq_used_signal(vdev, virtqueue)) {
continue;
}
/* Syscall is quite long so update time */
now = spdk_get_ticks();
virtqueue->next_event_time = now + virtqueue->irq_delay_time;
}
}
}
int
spdk_vhost_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us,
uint32_t iops_threshold)
{
uint64_t delay_time_base = delay_base_us * spdk_get_ticks_hz() / 1000000ULL;
uint32_t io_rate = iops_threshold * SPDK_VHOST_DEV_STATS_CHECK_INTERVAL_MS / 1000;
if (delay_time_base >= UINT32_MAX) {
SPDK_ERRLOG("Delay time of %"PRIu32" is to big\n", delay_base_us);
return -EINVAL;
} else if (io_rate == 0) {
SPDK_ERRLOG("IOPS rate of %"PRIu32" is too low. Min is %u\n", io_rate,
1000U / SPDK_VHOST_DEV_STATS_CHECK_INTERVAL_MS);
return -EINVAL;
}
vdev->coalescing_delay_time_base = delay_time_base;
vdev->coalescing_io_rate_threshold = io_rate;
return 0;
}
/*
* Enqueue id and len to used ring.
*/
@ -158,13 +269,13 @@ void
spdk_vhost_vq_used_ring_enqueue(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *virtqueue,
uint16_t id, uint32_t len)
{
int need_event = 0;
struct rte_vhost_vring *vring = &virtqueue->vring;
struct vring_used *used = vring->used;
uint16_t last_idx = vring->last_used_idx & (vring->size - 1);
SPDK_DEBUGLOG(SPDK_TRACE_VHOST_RING, "USED: last_idx=%"PRIu16" req id=%"PRIu16" len=%"PRIu32"\n",
vring->last_used_idx, id, len);
SPDK_DEBUGLOG(SPDK_TRACE_VHOST_RING,
"Queue %td - USED RING: last_idx=%"PRIu16" req id=%"PRIu16" len=%"PRIu32"\n",
virtqueue - vdev->virtqueue, vring->last_used_idx, id, len);
vring->last_used_idx++;
used->ring[last_idx].id = id;
@ -173,16 +284,13 @@ spdk_vhost_vq_used_ring_enqueue(struct spdk_vhost_dev *vdev, struct spdk_vhost_v
spdk_wmb();
* (volatile uint16_t *) &used->idx = vring->last_used_idx;
if (spdk_vhost_dev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
spdk_unlikely(vring->avail->idx == vring->last_avail_idx)) {
need_event = 1;
} else {
spdk_mb();
need_event = !(vring->avail->flags & VRING_AVAIL_F_NO_INTERRUPT);
}
virtqueue->used_req_cnt++;
if (need_event) {
eventfd_write(vring->callfd, (eventfd_t)1);
/* We need to signal every last_used_idx overflow. */
if (vring->last_used_idx == 0 ||
(spdk_vhost_dev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
spdk_unlikely(vring->avail->idx == vring->last_avail_idx))) {
spdk_vhost_vq_used_signal(vdev, virtqueue);
}
}
@ -465,6 +573,12 @@ spdk_vhost_dev_construct(struct spdk_vhost_dev *vdev, const char *name, const ch
vdev->type = type;
vdev->backend = backend;
spdk_vhost_set_coalescing(vdev, SPDK_VHOST_COALESCING_DELAY_BASE_US,
SPDK_VHOST_VQ_IOPS_COALESCING_THRESHOLD);
vdev->next_stats_check_time = 0;
vdev->stats_check_interval = SPDK_VHOST_DEV_STATS_CHECK_INTERVAL_MS * spdk_get_ticks_hz() /
1000UL;
g_spdk_vhost_devices[ctrlr_num] = vdev;
if (rte_vhost_driver_start(path) != 0) {

View File

@ -326,6 +326,8 @@ vdev_worker(void *arg)
for (q_idx = 0; q_idx < bvdev->vdev.num_queues; q_idx++) {
process_vq(bvdev, &bvdev->vdev.virtqueue[q_idx]);
}
spdk_vhost_dev_used_signal(&bvdev->vdev);
}
static void
@ -357,6 +359,8 @@ no_bdev_vdev_worker(void *arg)
for (q_idx = 0; q_idx < bvdev->vdev.num_queues; q_idx++) {
no_bdev_process_vq(bvdev, &bvdev->vdev.virtqueue[q_idx]);
}
spdk_vhost_dev_used_signal(&bvdev->vdev);
}
static struct spdk_vhost_blk_dev *
@ -522,11 +526,17 @@ destroy_device_poller_cb(void *arg)
{
struct spdk_vhost_dev_destroy_ctx *ctx = arg;
struct spdk_vhost_blk_dev *bvdev = ctx->bvdev;
int i;
if (bvdev->vdev.task_cnt > 0) {
return;
}
for (i = 0; i < bvdev->vdev.num_queues; i++) {
bvdev->vdev.virtqueue[i].next_event_time = 0;
spdk_vhost_vq_used_signal(&bvdev->vdev, &bvdev->vdev.virtqueue[i]);
}
SPDK_NOTICELOG("Stopping poller for vhost controller %s\n", bvdev->vdev.name);
if (bvdev->bdev_io_channel) {

View File

@ -67,6 +67,22 @@
#define SPDK_VHOST_IOVS_MAX 128
/*
* Rate at which stats are checked for interrupt coalescing.
*/
#define SPDK_VHOST_DEV_STATS_CHECK_INTERVAL_MS 10
/*
* Default threshold at which interrupts start to be coalesced.
*/
#define SPDK_VHOST_VQ_IOPS_COALESCING_THRESHOLD 60000
/*
* Currently coalescing is not used by default.
* Setting this to value > 0 here or by RPC will enable coalescing.
*/
#define SPDK_VHOST_COALESCING_DELAY_BASE_US 0
#define SPDK_VHOST_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
(1ULL << VIRTIO_F_VERSION_1) | \
@ -79,13 +95,26 @@
(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
enum spdk_vhost_dev_type {
SPDK_VHOST_DEV_T_SCSI,
SPDK_VHOST_DEV_T_BLK,
SPDK_VHOST_DEV_T_SCSI,//!< SPDK_VHOST_DEV_T_SCSI
SPDK_VHOST_DEV_T_BLK, //!< SPDK_VHOST_DEV_T_BLK
};
struct spdk_vhost_virtqueue {
struct rte_vhost_vring vring;
void *tasks;
/* Request count from last stats check */
uint32_t req_cnt;
/* Request count from last event */
uint16_t used_req_cnt;
/* How long interrupt is delayed */
uint32_t irq_delay_time;
/* Next time when we need to send event */
uint64_t next_event_time;
} __attribute((aligned(SPDK_CACHE_LINE_SIZE)));
struct spdk_vhost_dev_backend {
@ -118,8 +147,21 @@ struct spdk_vhost_dev {
enum spdk_vhost_dev_type type;
const struct spdk_vhost_dev_backend *backend;
uint32_t coalescing_delay_time_base;
/* Threshold when event coalescing for virtqueue will be turned on. */
uint32_t coalescing_io_rate_threshold;
/* Next time when stats for event coalescing will be checked. */
uint64_t next_stats_check_time;
/* Interval used for event coalescing checking. */
uint64_t stats_check_interval;
uint16_t num_queues;
uint64_t negotiated_features;
struct spdk_vhost_virtqueue virtqueue[SPDK_VHOST_MAX_VQUEUES];
};
@ -153,6 +195,25 @@ bool spdk_vhost_vq_should_notify(struct spdk_vhost_dev *vdev, struct spdk_vhost_
int spdk_vhost_vq_get_desc(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *vq,
uint16_t req_idx, struct vring_desc **desc, struct vring_desc **desc_table,
uint32_t *desc_table_size);
/**
* Send IRQ/call client (if pending) for \c vq.
* \param vdev vhost device
* \param vq virtqueue
* \return
* 0 - if no interrupt was signalled
* 1 - if interrupt was signalled
*/
int spdk_vhost_vq_used_signal(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *vq);
/**
* Send IRQs for all queues that need to be signaled.
* \param vdev vhost device
* \param vq virtqueue
*/
void spdk_vhost_dev_used_signal(struct spdk_vhost_dev *vdev);
void spdk_vhost_vq_used_ring_enqueue(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *vq,
uint16_t id, uint32_t len);

View File

@ -531,4 +531,96 @@ spdk_rpc_get_vhost_controllers(struct spdk_jsonrpc_request *request,
}
SPDK_RPC_REGISTER("get_vhost_controllers", spdk_rpc_get_vhost_controllers)
struct rpc_vhost_ctrlr_coalescing {
char *ctrlr;
uint32_t delay_base_us;
uint32_t iops_threshold;
struct spdk_jsonrpc_request *request;
};
static const struct spdk_json_object_decoder rpc_set_vhost_ctrlr_coalescing[] = {
{"ctrlr", offsetof(struct rpc_vhost_ctrlr_coalescing, ctrlr), spdk_json_decode_string },
{"delay_base_us", offsetof(struct rpc_vhost_ctrlr_coalescing, delay_base_us), spdk_json_decode_uint32},
{"iops_threshold", offsetof(struct rpc_vhost_ctrlr_coalescing, iops_threshold), spdk_json_decode_uint32},
};
static void
free_rpc_set_vhost_controllers_event_coalescing(struct rpc_vhost_ctrlr_coalescing *req)
{
if (!req) {
return;
}
free(req->ctrlr);
free(req);
}
static int
spdk_rpc_set_vhost_controller_coalescing_cb(struct spdk_vhost_dev *vdev, void *arg)
{
struct rpc_vhost_ctrlr_coalescing *req = arg;
struct spdk_json_write_ctx *w;
char buf[64];
int rc;
if (vdev == NULL) {
rc = -ENODEV;
goto invalid;
}
rc = spdk_vhost_set_coalescing(vdev, req->delay_base_us, req->iops_threshold);
if (rc) {
goto invalid;
}
w = spdk_jsonrpc_begin_result(req->request);
if (w != NULL) {
spdk_json_write_bool(w, true);
spdk_jsonrpc_end_result(req->request, w);
}
free_rpc_set_vhost_controllers_event_coalescing(req);
return 0;
invalid:
spdk_strerror_r(-rc, buf, sizeof(buf));
spdk_jsonrpc_send_error_response(req->request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, buf);
free_rpc_set_vhost_controllers_event_coalescing(req);
return 0;
}
static void
spdk_rpc_set_vhost_controller_coalescing(struct spdk_jsonrpc_request *request,
const struct spdk_json_val *params)
{
struct rpc_vhost_ctrlr_coalescing *req;
char buf[64];
int rc;
req = calloc(1, sizeof(struct rpc_vhost_ctrlr_coalescing));
if (!req) {
rc = -ENOMEM;
goto invalid;
}
if (spdk_json_decode_object(params, rpc_set_vhost_ctrlr_coalescing,
SPDK_COUNTOF(rpc_set_vhost_ctrlr_coalescing), req)) {
SPDK_DEBUGLOG(SPDK_TRACE_VHOST_RPC, "spdk_json_decode_object failed\n");
rc = -EINVAL;
goto invalid;
}
req->request = request;
spdk_vhost_call_external_event(req->ctrlr, spdk_rpc_set_vhost_controller_coalescing_cb, req);
return;
invalid:
spdk_strerror_r(-rc, buf, sizeof(buf));
free_rpc_set_vhost_controllers_event_coalescing(req);
spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, buf);
}
SPDK_RPC_REGISTER("set_vhost_controller_coalescing", spdk_rpc_set_vhost_controller_coalescing)
SPDK_LOG_REGISTER_TRACE_FLAG("vhost_rpc", SPDK_TRACE_VHOST_RPC)

View File

@ -643,7 +643,10 @@ vdev_mgmt_worker(void *arg)
struct spdk_vhost_scsi_dev *svdev = arg;
process_removed_devs(svdev);
spdk_vhost_vq_used_signal(&svdev->vdev, &svdev->vdev.virtqueue[VIRTIO_SCSI_EVENTQ]);
process_controlq(svdev, &svdev->vdev.virtqueue[VIRTIO_SCSI_CONTROLQ]);
spdk_vhost_vq_used_signal(&svdev->vdev, &svdev->vdev.virtqueue[VIRTIO_SCSI_CONTROLQ]);
}
static void
@ -655,6 +658,8 @@ vdev_worker(void *arg)
for (q_idx = VIRTIO_SCSI_REQUESTQ; q_idx < svdev->vdev.num_queues; q_idx++) {
process_requestq(svdev, &svdev->vdev.virtqueue[q_idx]);
}
spdk_vhost_dev_used_signal(&svdev->vdev);
}
static struct spdk_vhost_scsi_dev *
@ -1071,6 +1076,11 @@ destroy_device_poller_cb(void *arg)
return;
}
for (i = 0; i < svdev->vdev.num_queues; i++) {
spdk_vhost_vq_used_signal(&svdev->vdev, &svdev->vdev.virtqueue[i]);
}
for (i = 0; i < SPDK_VHOST_SCSI_CTRLR_MAX_DEVS; i++) {
if (svdev->scsi_dev[i] == NULL) {
continue;

View File

@ -592,6 +592,20 @@ p = subparsers.add_parser('kill_instance', help='Send signal to instance')
p.add_argument('sig_name', help='signal will be sent to server.')
p.set_defaults(func=kill_instance)
def set_vhost_controller_coalescing(args):
params = {
'ctrlr': args.ctrlr,
'delay_base_us': args.delay_base_us,
'iops_threshold': args.iops_threshold,
}
jsonrpc_call('set_vhost_controller_coalescing', params)
p = subparsers.add_parser('set_vhost_controller_coalescing', help='Set vhost controller coalescing')
p.add_argument('ctrlr', help='controller name')
p.add_argument('delay_base_us', help='Base delay time', type=int)
p.add_argument('iops_threshold', help='IOPS threshold when coalescing is enabled', type=int)
p.set_defaults(func=set_vhost_controller_coalescing)
def construct_vhost_scsi_controller(args):
params = {'ctrlr': args.ctrlr}

View File

@ -48,6 +48,9 @@ DEFINE_STUB(spdk_mem_unregister, int, (void *vaddr, size_t len), 0);
DEFINE_STUB(spdk_app_get_core_mask, uint64_t, (void), 0);
DEFINE_STUB_V(spdk_app_stop, (int rc));
DEFINE_STUB_V(spdk_event_call, (struct spdk_event *event));
DEFINE_STUB_V(spdk_poller_register, (struct spdk_poller **ppoller, spdk_poller_fn fn, void *arg,
uint32_t lcore, uint64_t period_microseconds));
DEFINE_STUB_V(spdk_poller_unregister, (struct spdk_poller **ppoller, struct spdk_event *complete));
DEFINE_STUB(spdk_iommu_mem_unregister, int, (uint64_t addr, uint64_t len), 0);
DEFINE_STUB(rte_vhost_get_mem_table, int, (int vid, struct rte_vhost_memory **mem), 0);
DEFINE_STUB(rte_vhost_get_negotiated_features, int, (int vid, uint64_t *features), 0);

View File

@ -66,6 +66,9 @@ DEFINE_STUB_V(spdk_vhost_vq_used_ring_enqueue, (struct spdk_vhost_dev *vdev,
DEFINE_STUB(spdk_vhost_vq_get_desc, int, (struct spdk_vhost_dev *vdev,
struct spdk_vhost_virtqueue *vq, uint16_t req_idx, struct vring_desc **desc,
struct vring_desc **desc_table, uint32_t *desc_table_size), 0);
DEFINE_STUB(spdk_vhost_vq_used_signal, int, (struct spdk_vhost_dev *vdev,
struct spdk_vhost_virtqueue *virtqueue), 0);
DEFINE_STUB_V(spdk_vhost_dev_used_signal, (struct spdk_vhost_dev *vdev));
DEFINE_STUB(spdk_vhost_vring_desc_is_wr, bool, (struct vring_desc *cur_desc), false);
DEFINE_STUB(spdk_vhost_vring_desc_to_iov, int, (struct spdk_vhost_dev *vdev, struct iovec *iov,
uint16_t *iov_index, const struct vring_desc *desc), 0);

View File

@ -62,6 +62,9 @@ DEFINE_STUB(spdk_vhost_vq_avail_ring_get, uint16_t, (struct spdk_vhost_virtqueue
DEFINE_STUB(spdk_vhost_vq_get_desc, int, (struct spdk_vhost_dev *vdev,
struct spdk_vhost_virtqueue *vq, uint16_t req_idx, struct vring_desc **desc,
struct vring_desc **desc_table, uint32_t *desc_table_size), 0);
DEFINE_STUB(spdk_vhost_vq_used_signal, int, (struct spdk_vhost_dev *vdev,
struct spdk_vhost_virtqueue *virtqueue), 0);
DEFINE_STUB_V(spdk_vhost_dev_used_signal, (struct spdk_vhost_dev *vdev));
DEFINE_STUB_VP(spdk_vhost_gpa_to_vva, (struct spdk_vhost_dev *vdev, uint64_t addr), {0});
DEFINE_STUB_V(spdk_vhost_vq_used_ring_enqueue, (struct spdk_vhost_dev *vdev,
struct spdk_vhost_virtqueue *vq, uint16_t id, uint32_t len));