module/crypto: load balance QAT qp assignment
For the latest QAT devices there are 3 CPMs each with 16 VFs and 2 qp each VF. To load balance for multi-thread operations we want to assign each new queue pair (QP) on a per CPM (processing module) basis so this patch assigns the next QP, for QAT, by taking the last + 32 modulo the total number of QP. This will results in each new channel getting a QP on the next CPM. Signed-off-by: paul luse <paul.e.luse@intel.com> Change-Id: Iea608ada68517b6f2faecd45701c7aae6d23a2d0 Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/477082 Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Community-CI: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
parent
1a6878db61
commit
989261d6c3
@ -54,6 +54,13 @@
|
||||
#define MAX_NUM_DRV_TYPES 2
|
||||
#define AESNI_MB "crypto_aesni_mb"
|
||||
#define QAT "crypto_qat"
|
||||
/* The VF spread is the number of queue pairs between virtual functions, we use this to
|
||||
* load balance the QAT device.
|
||||
*/
|
||||
#define QAT_VF_SPREAD 32
|
||||
static uint8_t g_qat_total_qp = 0;
|
||||
static uint8_t g_next_qat_index;
|
||||
|
||||
const char *g_driver_names[MAX_NUM_DRV_TYPES] = { AESNI_MB, QAT };
|
||||
|
||||
/* Global list of available crypto devices. */
|
||||
@ -64,11 +71,15 @@ struct vbdev_dev {
|
||||
};
|
||||
static TAILQ_HEAD(, vbdev_dev) g_vbdev_devs = TAILQ_HEAD_INITIALIZER(g_vbdev_devs);
|
||||
|
||||
/* Global list and lock for unique device/queue pair combos */
|
||||
/* Global list and lock for unique device/queue pair combos. We keep 1 list per supported PMD
|
||||
* so that we can optimize per PMD where it make sense. For example, with QAT there an optimal
|
||||
* pattern for assigning queue pairs where with AESNI there is not.
|
||||
*/
|
||||
struct device_qp {
|
||||
struct vbdev_dev *device; /* ptr to crypto device */
|
||||
uint8_t qp; /* queue pair for this node */
|
||||
bool in_use; /* whether this node is in use or not */
|
||||
uint8_t index; /* used by QAT to load balance placement of qpairs */
|
||||
TAILQ_ENTRY(device_qp) link;
|
||||
};
|
||||
static TAILQ_HEAD(, device_qp) g_device_qp_qat = TAILQ_HEAD_INITIALIZER(g_device_qp_qat);
|
||||
@ -319,6 +330,9 @@ create_vbdev_dev(uint8_t index, uint16_t num_lcores)
|
||||
dev_qp->device = device;
|
||||
dev_qp->qp = j;
|
||||
dev_qp->in_use = false;
|
||||
if (strcmp(device->cdev_info.driver_name, QAT) == 0) {
|
||||
g_qat_total_qp++;
|
||||
}
|
||||
TAILQ_INSERT_TAIL(dev_qp_head, dev_qp, link);
|
||||
}
|
||||
|
||||
@ -346,10 +360,11 @@ static int
|
||||
vbdev_crypto_init_crypto_drivers(void)
|
||||
{
|
||||
uint8_t cdev_count;
|
||||
uint8_t cdev_id, i;
|
||||
int rc = 0;
|
||||
uint8_t cdev_id;
|
||||
int i, rc = 0;
|
||||
struct vbdev_dev *device;
|
||||
struct vbdev_dev *tmp_dev;
|
||||
struct device_qp *dev_qp;
|
||||
unsigned int max_sess_size = 0, sess_size;
|
||||
uint16_t num_lcores = rte_lcore_count();
|
||||
|
||||
@ -441,6 +456,15 @@ vbdev_crypto_init_crypto_drivers(void)
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
/* Assign index values to the QAT device qp nodes so that we can
|
||||
* assign them for optimal performance.
|
||||
*/
|
||||
i = 0;
|
||||
TAILQ_FOREACH(dev_qp, &g_device_qp_qat, link) {
|
||||
dev_qp->index = i++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
/* Error cleanup paths. */
|
||||
@ -1244,11 +1268,22 @@ _assign_device_qp(struct vbdev_crypto *crypto_bdev, struct device_qp *device_qp,
|
||||
{
|
||||
pthread_mutex_lock(&g_device_qp_lock);
|
||||
if (strcmp(crypto_bdev->drv_name, QAT) == 0) {
|
||||
/* For some QAT devices, the optimal qp to use is every 32nd as this spreads the
|
||||
* workload out over the multiple virtual functions in the device. For the devices
|
||||
* where this isn't the case, it doesn't hurt.
|
||||
*/
|
||||
TAILQ_FOREACH(device_qp, &g_device_qp_qat, link) {
|
||||
if (device_qp->index != g_next_qat_index) {
|
||||
continue;
|
||||
}
|
||||
if (device_qp->in_use == false) {
|
||||
crypto_ch->device_qp = device_qp;
|
||||
device_qp->in_use = true;
|
||||
g_next_qat_index = (g_next_qat_index + QAT_VF_SPREAD) % g_qat_total_qp;
|
||||
break;
|
||||
} else {
|
||||
/* if the preferred index is used, skip to the next one in this set. */
|
||||
g_next_qat_index = (g_next_qat_index + 1) % g_qat_total_qp;
|
||||
}
|
||||
}
|
||||
} else if (strcmp(crypto_bdev->drv_name, AESNI_MB) == 0) {
|
||||
|
@ -48,6 +48,9 @@ uint16_t g_dequeue_mock;
|
||||
uint16_t g_enqueue_mock;
|
||||
unsigned ut_rte_crypto_op_bulk_alloc;
|
||||
int ut_rte_crypto_op_attach_sym_session = 0;
|
||||
#define MOCK_INFO_GET_1QP_AESNI 0
|
||||
#define MOCK_INFO_GET_1QP_QAT 1
|
||||
#define MOCK_INFO_GET_1QP_BOGUS_PMD 2
|
||||
int ut_rte_cryptodev_info_get = 0;
|
||||
bool ut_rte_cryptodev_info_get_mocked = false;
|
||||
|
||||
@ -216,8 +219,14 @@ struct device_qp g_dev_qp;
|
||||
void
|
||||
rte_cryptodev_info_get(uint8_t dev_id, struct rte_cryptodev_info *dev_info)
|
||||
{
|
||||
dev_info->max_nb_queue_pairs = ut_rte_cryptodev_info_get;
|
||||
dev_info->driver_name = g_driver_names[0];
|
||||
dev_info->max_nb_queue_pairs = 1;
|
||||
if (ut_rte_cryptodev_info_get == MOCK_INFO_GET_1QP_AESNI) {
|
||||
dev_info->driver_name = g_driver_names[0];
|
||||
} else if (ut_rte_cryptodev_info_get == MOCK_INFO_GET_1QP_QAT) {
|
||||
dev_info->driver_name = g_driver_names[1];
|
||||
} else if (ut_rte_cryptodev_info_get == MOCK_INFO_GET_1QP_BOGUS_PMD) {
|
||||
dev_info->driver_name = "junk";
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int
|
||||
@ -710,6 +719,19 @@ test_reset(void)
|
||||
*/
|
||||
}
|
||||
|
||||
static void
|
||||
init_cleanup(void)
|
||||
{
|
||||
spdk_mempool_free(g_mbuf_mp);
|
||||
rte_mempool_free(g_session_mp);
|
||||
g_mbuf_mp = NULL;
|
||||
g_session_mp = NULL;
|
||||
if (g_session_mp_priv != NULL) {
|
||||
/* g_session_mp_priv may or may not be set depending on the DPDK version */
|
||||
rte_mempool_free(g_session_mp_priv);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
test_initdrivers(void)
|
||||
{
|
||||
@ -718,7 +740,6 @@ test_initdrivers(void)
|
||||
static struct rte_mempool *orig_session_mp;
|
||||
static struct rte_mempool *orig_session_mp_priv;
|
||||
|
||||
|
||||
/* These tests will alloc and free our g_mbuf_mp
|
||||
* so save that off here and restore it after each test is over.
|
||||
*/
|
||||
@ -773,7 +794,7 @@ test_initdrivers(void)
|
||||
|
||||
/* Test crypto dev configure failure. */
|
||||
MOCK_SET(rte_cryptodev_device_count_by_driver, 2);
|
||||
MOCK_SET(rte_cryptodev_info_get, 1);
|
||||
MOCK_SET(rte_cryptodev_info_get, MOCK_INFO_GET_1QP_AESNI);
|
||||
MOCK_SET(rte_cryptodev_configure, -1);
|
||||
MOCK_CLEARED_ASSERT(spdk_mempool_create);
|
||||
rc = vbdev_crypto_init_crypto_drivers();
|
||||
@ -803,18 +824,28 @@ test_initdrivers(void)
|
||||
CU_ASSERT(g_session_mp_priv == NULL);
|
||||
MOCK_SET(rte_cryptodev_start, 0);
|
||||
|
||||
/* Test happy path. */
|
||||
/* Test bogus PMD */
|
||||
MOCK_CLEARED_ASSERT(spdk_mempool_create);
|
||||
MOCK_SET(rte_cryptodev_info_get, MOCK_INFO_GET_1QP_BOGUS_PMD);
|
||||
rc = vbdev_crypto_init_crypto_drivers();
|
||||
CU_ASSERT(g_mbuf_mp == NULL);
|
||||
CU_ASSERT(g_session_mp == NULL);
|
||||
CU_ASSERT(rc == -EINVAL);
|
||||
|
||||
/* Test happy path QAT. */
|
||||
MOCK_CLEARED_ASSERT(spdk_mempool_create);
|
||||
MOCK_SET(rte_cryptodev_info_get, MOCK_INFO_GET_1QP_QAT);
|
||||
rc = vbdev_crypto_init_crypto_drivers();
|
||||
/* We don't have spdk_mempool_create mocked right now, so make sure to free the mempools. */
|
||||
CU_ASSERT(g_mbuf_mp != NULL);
|
||||
CU_ASSERT(g_session_mp != NULL);
|
||||
spdk_mempool_free(g_mbuf_mp);
|
||||
rte_mempool_free(g_session_mp);
|
||||
if (g_session_mp_priv != NULL) {
|
||||
/* g_session_mp_priv may or may not be set depending on the DPDK version */
|
||||
rte_mempool_free(g_session_mp_priv);
|
||||
}
|
||||
init_cleanup();
|
||||
CU_ASSERT(rc == 0);
|
||||
|
||||
/* Test happy path AESNI. */
|
||||
MOCK_CLEARED_ASSERT(spdk_mempool_create);
|
||||
MOCK_SET(rte_cryptodev_info_get, MOCK_INFO_GET_1QP_AESNI);
|
||||
rc = vbdev_crypto_init_crypto_drivers();
|
||||
init_cleanup();
|
||||
CU_ASSERT(rc == 0);
|
||||
|
||||
/* restore our initial values. */
|
||||
@ -951,6 +982,88 @@ test_poller(void)
|
||||
CU_ASSERT(rc == 2);
|
||||
}
|
||||
|
||||
/* Helper function for test_assign_device_qp() */
|
||||
static void
|
||||
_clear_device_qp_lists(void)
|
||||
{
|
||||
struct device_qp *device_qp = NULL;
|
||||
|
||||
while (!TAILQ_EMPTY(&g_device_qp_qat)) {
|
||||
device_qp = TAILQ_FIRST(&g_device_qp_qat);
|
||||
TAILQ_REMOVE(&g_device_qp_qat, device_qp, link);
|
||||
free(device_qp);
|
||||
|
||||
}
|
||||
CU_ASSERT(TAILQ_EMPTY(&g_device_qp_qat) == true);
|
||||
while (!TAILQ_EMPTY(&g_device_qp_aesni_mb)) {
|
||||
device_qp = TAILQ_FIRST(&g_device_qp_aesni_mb);
|
||||
TAILQ_REMOVE(&g_device_qp_aesni_mb, device_qp, link);
|
||||
free(device_qp);
|
||||
}
|
||||
CU_ASSERT(TAILQ_EMPTY(&g_device_qp_aesni_mb) == true);
|
||||
}
|
||||
|
||||
/* Helper function for test_assign_device_qp() */
|
||||
static void
|
||||
_check_expected_values(struct vbdev_crypto *crypto_bdev, struct device_qp *device_qp,
|
||||
struct crypto_io_channel *crypto_ch, uint8_t expected_index,
|
||||
uint8_t current_index)
|
||||
{
|
||||
_assign_device_qp(&g_crypto_bdev, device_qp, g_crypto_ch);
|
||||
CU_ASSERT(g_crypto_ch->device_qp->index == expected_index);
|
||||
CU_ASSERT(g_next_qat_index == current_index);
|
||||
}
|
||||
|
||||
static void
|
||||
test_assign_device_qp(void)
|
||||
{
|
||||
struct device_qp *device_qp = NULL;
|
||||
int i;
|
||||
|
||||
/* start with a known state, clear the device/qp lists */
|
||||
_clear_device_qp_lists();
|
||||
|
||||
/* make sure that one AESNI_MB qp is found */
|
||||
device_qp = calloc(1, sizeof(struct device_qp));
|
||||
TAILQ_INSERT_TAIL(&g_device_qp_aesni_mb, device_qp, link);
|
||||
g_crypto_ch->device_qp = NULL;
|
||||
g_crypto_bdev.drv_name = AESNI_MB;
|
||||
_assign_device_qp(&g_crypto_bdev, device_qp, g_crypto_ch);
|
||||
CU_ASSERT(g_crypto_ch->device_qp != NULL);
|
||||
|
||||
/* QAT testing is more complex as the code under test load balances by
|
||||
* assigning each subsequent device/qp to every QAT_VF_SPREAD modulo
|
||||
* g_qat_total_qp. For the current latest QAT we'll have 48 virtual functions
|
||||
* each with 2 qp so the "spread" betwen assignments is 32.
|
||||
*/
|
||||
g_qat_total_qp = 96;
|
||||
for (i = 0; i < g_qat_total_qp; i++) {
|
||||
device_qp = calloc(1, sizeof(struct device_qp));
|
||||
device_qp->index = i;
|
||||
TAILQ_INSERT_TAIL(&g_device_qp_qat, device_qp, link);
|
||||
}
|
||||
g_crypto_ch->device_qp = NULL;
|
||||
g_crypto_bdev.drv_name = QAT;
|
||||
|
||||
/* First assignment will assign to 0 and next at 32. */
|
||||
_check_expected_values(&g_crypto_bdev, device_qp, g_crypto_ch,
|
||||
0, QAT_VF_SPREAD);
|
||||
|
||||
/* Second assignment will assign to 32 and next at 64. */
|
||||
_check_expected_values(&g_crypto_bdev, device_qp, g_crypto_ch,
|
||||
QAT_VF_SPREAD, QAT_VF_SPREAD * 2);
|
||||
|
||||
/* Third assignment will assign to 64 and next at 0. */
|
||||
_check_expected_values(&g_crypto_bdev, device_qp, g_crypto_ch,
|
||||
QAT_VF_SPREAD * 2, 0);
|
||||
|
||||
/* Fourth assignment will assign to 1 and next at 33. */
|
||||
_check_expected_values(&g_crypto_bdev, device_qp, g_crypto_ch,
|
||||
1, QAT_VF_SPREAD + 1);
|
||||
|
||||
_clear_device_qp_lists();
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
@ -990,7 +1103,9 @@ main(int argc, char **argv)
|
||||
CU_add_test(suite, "test_reset",
|
||||
test_reset) == NULL ||
|
||||
CU_add_test(suite, "test_poller",
|
||||
test_poller) == NULL
|
||||
test_poller) == NULL ||
|
||||
CU_add_test(suite, "test_assign_device_qp",
|
||||
test_assign_device_qp) == NULL
|
||||
) {
|
||||
CU_cleanup_registry();
|
||||
return CU_get_error();
|
||||
|
Loading…
x
Reference in New Issue
Block a user