Add sysctl node for ENA IO queues number adjustment

By default, in ena_attach() the driver attempts to acquire
ena_adapter::max_num_io_queues MSI-X vectors for the purpose of IO
queues, however this is not guaranteed. The number of vectors acquired
depends also on system resources availability.

Regardless of that, enable the number of effectively used IO queues to
be further limited through the sysctl node.

Example: Assumming that there are 8 IO queues configured by default, the
command

$ sysctl dev.ena.0.io_queues_nb=4

will reduce the number of available IO queues to 4. Similarly, the value
can be also increased up to maximum supported value. A value higher than
maximum supported number of IO queues is ignored. Zero is ignored too.

Submitted by:  Maciej Bielski <mba@semihalf.com>
Obtained from: Semihalf
Sponsored by:  Amazon, Inc.
This commit is contained in:
Marcin Wojtas 2020-05-26 15:57:02 +00:00
parent e2735b095b
commit 56d41ad5fe
3 changed files with 130 additions and 1 deletions

View File

@ -1238,6 +1238,61 @@ ena_update_queue_size(struct ena_adapter *adapter, uint32_t new_tx_size,
return (rc);
}
static void
ena_update_io_rings(struct ena_adapter *adapter, uint32_t num)
{
ena_free_all_io_rings_resources(adapter);
/* Force indirection table to be reinitialized */
ena_com_rss_destroy(adapter->ena_dev);
adapter->num_io_queues = num;
ena_init_io_rings(adapter);
}
/* Caller should sanitize new_num */
int
ena_update_io_queue_nb(struct ena_adapter *adapter, uint32_t new_num)
{
uint32_t old_num;
int rc = 0;
bool dev_was_up;
ENA_LOCK_LOCK(adapter);
dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
old_num = adapter->num_io_queues;
ena_down(adapter);
ena_update_io_rings(adapter, new_num);
if (dev_was_up) {
rc = ena_up(adapter);
if (unlikely(rc != 0)) {
device_printf(adapter->pdev,
"Failed to configure device with %u IO queues. "
"Reverting to previous value: %u\n",
new_num, old_num);
ena_update_io_rings(adapter, old_num);
rc = ena_up(adapter);
if (unlikely(rc != 0)) {
device_printf(adapter->pdev,
"Failed to revert to previous setup IO "
"queues. Triggering device reset.\n");
ENA_FLAG_SET_ATOMIC(
ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
ena_trigger_reset(adapter,
ENA_REGS_RESET_OS_TRIGGER);
}
}
}
ENA_LOCK_UNLOCK(adapter);
return (rc);
}
static void
ena_free_rx_bufs(struct ena_adapter *adapter, unsigned int qid)
{
@ -1865,6 +1920,18 @@ ena_rss_configure(struct ena_adapter *adapter)
struct ena_com_dev *ena_dev = adapter->ena_dev;
int rc;
/* In case the RSS table was destroyed */
if (!ena_dev->rss.tbl_log_size) {
rc = ena_rss_init_default(adapter);
if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
device_printf(adapter->pdev,
"WARNING: RSS was not properly re-initialized,"
" it will affect bandwidth\n");
ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_RSS_ACTIVE, adapter);
return (rc);
}
}
/* Set indirect table */
rc = ena_com_indirect_table_set(ena_dev);
if (unlikely((rc != 0) && (rc != EOPNOTSUPP)))
@ -1890,8 +1957,11 @@ ena_up_complete(struct ena_adapter *adapter)
if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
rc = ena_rss_configure(adapter);
if (rc != 0)
if (rc != 0) {
device_printf(adapter->pdev,
"Failed to configure RSS\n");
return (rc);
}
}
rc = ena_change_mtu(adapter->ifp, adapter->ifp->if_mtu);

View File

@ -501,6 +501,7 @@ int ena_update_buf_ring_size(struct ena_adapter *adapter,
uint32_t new_buf_ring_size);
int ena_update_queue_size(struct ena_adapter *adapter, uint32_t new_tx_size,
uint32_t new_rx_size);
int ena_update_io_queue_nb(struct ena_adapter *adapter, uint32_t new_num);
static inline void
ena_trigger_reset(struct ena_adapter *adapter,

View File

@ -37,6 +37,7 @@ static void ena_sysctl_add_stats(struct ena_adapter *);
static void ena_sysctl_add_tuneables(struct ena_adapter *);
static int ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS);
static int ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS);
static int ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS);
static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"ENA driver parameters");
@ -316,6 +317,11 @@ ena_sysctl_add_tuneables(struct ena_adapter *adapter)
CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
ena_sysctl_rx_queue_size, "I",
"Size of the Rx ring. The size should be a power of 2.");
/* Tuneable number of IO queues */
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "io_queues_nb",
CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
ena_sysctl_io_queues_nb, "I", "Number of IO queues.");
}
@ -403,3 +409,55 @@ ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS)
return (error);
}
/*
* Change number of effectively used IO queues adapter->num_io_queues
*/
static int
ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS)
{
struct ena_adapter *adapter = arg1;
uint32_t tmp = 0;
int error;
error = sysctl_wire_old_buffer(req, sizeof(tmp));
if (error == 0) {
tmp = adapter->num_io_queues;
error = sysctl_handle_int(oidp, &tmp, 0, req);
}
if (error != 0 || req->newptr == NULL)
return (error);
if (tmp == 0) {
device_printf(adapter->pdev,
"Requested number of IO queues is zero\n");
return (EINVAL);
}
/*
* The adapter::max_num_io_queues is the HW capability. The system
* resources availability may potentially be a tighter limit. Therefore
* the relation `adapter::max_num_io_queues >= adapter::msix_vecs`
* always holds true, while the `adapter::msix_vecs` is variable across
* device reset (`ena_destroy_device()` + `ena_restore_device()`).
*/
if (tmp > (adapter->msix_vecs - ENA_ADMIN_MSIX_VEC)) {
device_printf(adapter->pdev,
"Requested number of IO queues is higher than maximum "
"allowed (%u)\n", adapter->msix_vecs - ENA_ADMIN_MSIX_VEC);
return (EINVAL);
}
if (tmp == adapter->num_io_queues) {
device_printf(adapter->pdev,
"Requested number of IO queues is equal to current value "
"(%u)\n", adapter->num_io_queues);
} else {
device_printf(adapter->pdev,
"Requested new number of IO queues: %u, current value: "
"%u\n", tmp, adapter->num_io_queues);
error = ena_update_io_queue_nb(adapter, tmp);
}
return (error);
}