module/crypto: load balance QAT qp assignment

For the latest QAT devices there are 3 CPMs each with 16 VFs and
2 qp each VF. To load balance for multi-thread operations we want
to assign each new queue pair (QP) on a per CPM (processing module)
basis so this patch assigns the next QP, for QAT,  by taking the
last + 32 modulo the total number of QP. This will results in each new
channel getting a QP on the next CPM.

Signed-off-by: paul luse <paul.e.luse@intel.com>
Change-Id: Iea608ada68517b6f2faecd45701c7aae6d23a2d0
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/477082
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
paul luse 2019-12-06 22:29:21 +00:00 committed by Tomasz Zawadzki
parent 1a6878db61
commit 989261d6c3
2 changed files with 166 additions and 16 deletions

View File

@ -54,6 +54,13 @@
#define MAX_NUM_DRV_TYPES 2
#define AESNI_MB "crypto_aesni_mb"
#define QAT "crypto_qat"
/* The VF spread is the number of queue pairs between virtual functions, we use this to
* load balance the QAT device.
*/
#define QAT_VF_SPREAD 32
static uint8_t g_qat_total_qp = 0;
static uint8_t g_next_qat_index;
const char *g_driver_names[MAX_NUM_DRV_TYPES] = { AESNI_MB, QAT };
/* Global list of available crypto devices. */
@ -64,11 +71,15 @@ struct vbdev_dev {
};
static TAILQ_HEAD(, vbdev_dev) g_vbdev_devs = TAILQ_HEAD_INITIALIZER(g_vbdev_devs);
/* Global list and lock for unique device/queue pair combos */
/* Global list and lock for unique device/queue pair combos. We keep 1 list per supported PMD
* so that we can optimize per PMD where it make sense. For example, with QAT there an optimal
* pattern for assigning queue pairs where with AESNI there is not.
*/
struct device_qp {
struct vbdev_dev *device; /* ptr to crypto device */
uint8_t qp; /* queue pair for this node */
bool in_use; /* whether this node is in use or not */
uint8_t index; /* used by QAT to load balance placement of qpairs */
TAILQ_ENTRY(device_qp) link;
};
static TAILQ_HEAD(, device_qp) g_device_qp_qat = TAILQ_HEAD_INITIALIZER(g_device_qp_qat);
@ -319,6 +330,9 @@ create_vbdev_dev(uint8_t index, uint16_t num_lcores)
dev_qp->device = device;
dev_qp->qp = j;
dev_qp->in_use = false;
if (strcmp(device->cdev_info.driver_name, QAT) == 0) {
g_qat_total_qp++;
}
TAILQ_INSERT_TAIL(dev_qp_head, dev_qp, link);
}
@ -346,10 +360,11 @@ static int
vbdev_crypto_init_crypto_drivers(void)
{
uint8_t cdev_count;
uint8_t cdev_id, i;
int rc = 0;
uint8_t cdev_id;
int i, rc = 0;
struct vbdev_dev *device;
struct vbdev_dev *tmp_dev;
struct device_qp *dev_qp;
unsigned int max_sess_size = 0, sess_size;
uint16_t num_lcores = rte_lcore_count();
@ -441,6 +456,15 @@ vbdev_crypto_init_crypto_drivers(void)
goto err;
}
}
/* Assign index values to the QAT device qp nodes so that we can
* assign them for optimal performance.
*/
i = 0;
TAILQ_FOREACH(dev_qp, &g_device_qp_qat, link) {
dev_qp->index = i++;
}
return 0;
/* Error cleanup paths. */
@ -1244,11 +1268,22 @@ _assign_device_qp(struct vbdev_crypto *crypto_bdev, struct device_qp *device_qp,
{
pthread_mutex_lock(&g_device_qp_lock);
if (strcmp(crypto_bdev->drv_name, QAT) == 0) {
/* For some QAT devices, the optimal qp to use is every 32nd as this spreads the
* workload out over the multiple virtual functions in the device. For the devices
* where this isn't the case, it doesn't hurt.
*/
TAILQ_FOREACH(device_qp, &g_device_qp_qat, link) {
if (device_qp->index != g_next_qat_index) {
continue;
}
if (device_qp->in_use == false) {
crypto_ch->device_qp = device_qp;
device_qp->in_use = true;
g_next_qat_index = (g_next_qat_index + QAT_VF_SPREAD) % g_qat_total_qp;
break;
} else {
/* if the preferred index is used, skip to the next one in this set. */
g_next_qat_index = (g_next_qat_index + 1) % g_qat_total_qp;
}
}
} else if (strcmp(crypto_bdev->drv_name, AESNI_MB) == 0) {

View File

@ -48,6 +48,9 @@ uint16_t g_dequeue_mock;
uint16_t g_enqueue_mock;
unsigned ut_rte_crypto_op_bulk_alloc;
int ut_rte_crypto_op_attach_sym_session = 0;
#define MOCK_INFO_GET_1QP_AESNI 0
#define MOCK_INFO_GET_1QP_QAT 1
#define MOCK_INFO_GET_1QP_BOGUS_PMD 2
int ut_rte_cryptodev_info_get = 0;
bool ut_rte_cryptodev_info_get_mocked = false;
@ -216,8 +219,14 @@ struct device_qp g_dev_qp;
void
rte_cryptodev_info_get(uint8_t dev_id, struct rte_cryptodev_info *dev_info)
{
dev_info->max_nb_queue_pairs = ut_rte_cryptodev_info_get;
dev_info->driver_name = g_driver_names[0];
dev_info->max_nb_queue_pairs = 1;
if (ut_rte_cryptodev_info_get == MOCK_INFO_GET_1QP_AESNI) {
dev_info->driver_name = g_driver_names[0];
} else if (ut_rte_cryptodev_info_get == MOCK_INFO_GET_1QP_QAT) {
dev_info->driver_name = g_driver_names[1];
} else if (ut_rte_cryptodev_info_get == MOCK_INFO_GET_1QP_BOGUS_PMD) {
dev_info->driver_name = "junk";
}
}
unsigned int
@ -710,6 +719,19 @@ test_reset(void)
*/
}
static void
init_cleanup(void)
{
spdk_mempool_free(g_mbuf_mp);
rte_mempool_free(g_session_mp);
g_mbuf_mp = NULL;
g_session_mp = NULL;
if (g_session_mp_priv != NULL) {
/* g_session_mp_priv may or may not be set depending on the DPDK version */
rte_mempool_free(g_session_mp_priv);
}
}
static void
test_initdrivers(void)
{
@ -718,7 +740,6 @@ test_initdrivers(void)
static struct rte_mempool *orig_session_mp;
static struct rte_mempool *orig_session_mp_priv;
/* These tests will alloc and free our g_mbuf_mp
* so save that off here and restore it after each test is over.
*/
@ -773,7 +794,7 @@ test_initdrivers(void)
/* Test crypto dev configure failure. */
MOCK_SET(rte_cryptodev_device_count_by_driver, 2);
MOCK_SET(rte_cryptodev_info_get, 1);
MOCK_SET(rte_cryptodev_info_get, MOCK_INFO_GET_1QP_AESNI);
MOCK_SET(rte_cryptodev_configure, -1);
MOCK_CLEARED_ASSERT(spdk_mempool_create);
rc = vbdev_crypto_init_crypto_drivers();
@ -803,18 +824,28 @@ test_initdrivers(void)
CU_ASSERT(g_session_mp_priv == NULL);
MOCK_SET(rte_cryptodev_start, 0);
/* Test happy path. */
/* Test bogus PMD */
MOCK_CLEARED_ASSERT(spdk_mempool_create);
MOCK_SET(rte_cryptodev_info_get, MOCK_INFO_GET_1QP_BOGUS_PMD);
rc = vbdev_crypto_init_crypto_drivers();
CU_ASSERT(g_mbuf_mp == NULL);
CU_ASSERT(g_session_mp == NULL);
CU_ASSERT(rc == -EINVAL);
/* Test happy path QAT. */
MOCK_CLEARED_ASSERT(spdk_mempool_create);
MOCK_SET(rte_cryptodev_info_get, MOCK_INFO_GET_1QP_QAT);
rc = vbdev_crypto_init_crypto_drivers();
/* We don't have spdk_mempool_create mocked right now, so make sure to free the mempools. */
CU_ASSERT(g_mbuf_mp != NULL);
CU_ASSERT(g_session_mp != NULL);
spdk_mempool_free(g_mbuf_mp);
rte_mempool_free(g_session_mp);
if (g_session_mp_priv != NULL) {
/* g_session_mp_priv may or may not be set depending on the DPDK version */
rte_mempool_free(g_session_mp_priv);
}
init_cleanup();
CU_ASSERT(rc == 0);
/* Test happy path AESNI. */
MOCK_CLEARED_ASSERT(spdk_mempool_create);
MOCK_SET(rte_cryptodev_info_get, MOCK_INFO_GET_1QP_AESNI);
rc = vbdev_crypto_init_crypto_drivers();
init_cleanup();
CU_ASSERT(rc == 0);
/* restore our initial values. */
@ -951,6 +982,88 @@ test_poller(void)
CU_ASSERT(rc == 2);
}
/* Helper function for test_assign_device_qp() */
static void
_clear_device_qp_lists(void)
{
struct device_qp *device_qp = NULL;
while (!TAILQ_EMPTY(&g_device_qp_qat)) {
device_qp = TAILQ_FIRST(&g_device_qp_qat);
TAILQ_REMOVE(&g_device_qp_qat, device_qp, link);
free(device_qp);
}
CU_ASSERT(TAILQ_EMPTY(&g_device_qp_qat) == true);
while (!TAILQ_EMPTY(&g_device_qp_aesni_mb)) {
device_qp = TAILQ_FIRST(&g_device_qp_aesni_mb);
TAILQ_REMOVE(&g_device_qp_aesni_mb, device_qp, link);
free(device_qp);
}
CU_ASSERT(TAILQ_EMPTY(&g_device_qp_aesni_mb) == true);
}
/* Helper function for test_assign_device_qp() */
static void
_check_expected_values(struct vbdev_crypto *crypto_bdev, struct device_qp *device_qp,
struct crypto_io_channel *crypto_ch, uint8_t expected_index,
uint8_t current_index)
{
_assign_device_qp(&g_crypto_bdev, device_qp, g_crypto_ch);
CU_ASSERT(g_crypto_ch->device_qp->index == expected_index);
CU_ASSERT(g_next_qat_index == current_index);
}
static void
test_assign_device_qp(void)
{
struct device_qp *device_qp = NULL;
int i;
/* start with a known state, clear the device/qp lists */
_clear_device_qp_lists();
/* make sure that one AESNI_MB qp is found */
device_qp = calloc(1, sizeof(struct device_qp));
TAILQ_INSERT_TAIL(&g_device_qp_aesni_mb, device_qp, link);
g_crypto_ch->device_qp = NULL;
g_crypto_bdev.drv_name = AESNI_MB;
_assign_device_qp(&g_crypto_bdev, device_qp, g_crypto_ch);
CU_ASSERT(g_crypto_ch->device_qp != NULL);
/* QAT testing is more complex as the code under test load balances by
* assigning each subsequent device/qp to every QAT_VF_SPREAD modulo
* g_qat_total_qp. For the current latest QAT we'll have 48 virtual functions
* each with 2 qp so the "spread" betwen assignments is 32.
*/
g_qat_total_qp = 96;
for (i = 0; i < g_qat_total_qp; i++) {
device_qp = calloc(1, sizeof(struct device_qp));
device_qp->index = i;
TAILQ_INSERT_TAIL(&g_device_qp_qat, device_qp, link);
}
g_crypto_ch->device_qp = NULL;
g_crypto_bdev.drv_name = QAT;
/* First assignment will assign to 0 and next at 32. */
_check_expected_values(&g_crypto_bdev, device_qp, g_crypto_ch,
0, QAT_VF_SPREAD);
/* Second assignment will assign to 32 and next at 64. */
_check_expected_values(&g_crypto_bdev, device_qp, g_crypto_ch,
QAT_VF_SPREAD, QAT_VF_SPREAD * 2);
/* Third assignment will assign to 64 and next at 0. */
_check_expected_values(&g_crypto_bdev, device_qp, g_crypto_ch,
QAT_VF_SPREAD * 2, 0);
/* Fourth assignment will assign to 1 and next at 33. */
_check_expected_values(&g_crypto_bdev, device_qp, g_crypto_ch,
1, QAT_VF_SPREAD + 1);
_clear_device_qp_lists();
}
int
main(int argc, char **argv)
{
@ -990,7 +1103,9 @@ main(int argc, char **argv)
CU_add_test(suite, "test_reset",
test_reset) == NULL ||
CU_add_test(suite, "test_poller",
test_poller) == NULL
test_poller) == NULL ||
CU_add_test(suite, "test_assign_device_qp",
test_assign_device_qp) == NULL
) {
CU_cleanup_registry();
return CU_get_error();