/* SPDX-License-Identifier: BSD-3-Clause * Copyright 2020 Mellanox Technologies, Ltd */ #include #include #include #include #include #include #include #include "mlx5.h" #include "mlx5_flow.h" #include "mlx5_hws_cnt.h" #define MLX5_ASO_CNT_QUEUE_LOG_DESC 14 /** * Free MR resources. * * @param[in] cdev * Pointer to the mlx5 common device. * @param[in] mr * MR to free. */ static void mlx5_aso_dereg_mr(struct mlx5_common_device *cdev, struct mlx5_pmd_mr *mr) { void *addr = mr->addr; cdev->mr_scache.dereg_mr_cb(mr); mlx5_free(addr); memset(mr, 0, sizeof(*mr)); } /** * Register Memory Region. * * @param[in] cdev * Pointer to the mlx5 common device. * @param[in] length * Size of MR buffer. * @param[in/out] mr * Pointer to MR to create. * * @return * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int mlx5_aso_reg_mr(struct mlx5_common_device *cdev, size_t length, struct mlx5_pmd_mr *mr) { int ret; mr->addr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, length, 4096, SOCKET_ID_ANY); if (!mr->addr) { DRV_LOG(ERR, "Failed to create ASO bits mem for MR."); return -1; } ret = cdev->mr_scache.reg_mr_cb(cdev->pd, mr->addr, length, mr); if (ret) { DRV_LOG(ERR, "Failed to create direct Mkey."); mlx5_free(mr->addr); return -1; } return 0; } /** * Destroy Send Queue used for ASO access. * * @param[in] sq * ASO SQ to destroy. */ static void mlx5_aso_destroy_sq(struct mlx5_aso_sq *sq) { mlx5_devx_sq_destroy(&sq->sq_obj); mlx5_devx_cq_destroy(&sq->cq.cq_obj); memset(sq, 0, sizeof(*sq)); } /** * Initialize Send Queue used for ASO access counter. * * @param[in] sq * ASO SQ to initialize. */ static void mlx5_aso_cnt_init_sq(struct mlx5_aso_sq *sq) { volatile struct mlx5_aso_wqe *restrict wqe; int i; int size = 1 << sq->log_desc_n; /* All the next fields state should stay constant. */ for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) { wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) | (sizeof(*wqe) >> 4)); wqe->aso_cseg.operand_masks = rte_cpu_to_be_32 (0u | (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) | (ASO_OP_ALWAYS_FALSE << ASO_CSEG_COND_1_OPER_OFFSET) | (ASO_OP_ALWAYS_FALSE << ASO_CSEG_COND_0_OPER_OFFSET) | (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET)); wqe->aso_cseg.data_mask = RTE_BE64(UINT64_MAX); } } /** * Initialize Send Queue used for ASO access. * * @param[in] sq * ASO SQ to initialize. */ static void mlx5_aso_age_init_sq(struct mlx5_aso_sq *sq) { volatile struct mlx5_aso_wqe *restrict wqe; int i; int size = 1 << sq->log_desc_n; uint64_t addr; /* All the next fields state should stay constant. */ for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) { wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) | (sizeof(*wqe) >> 4)); wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.lkey); addr = (uint64_t)((uint64_t *)sq->mr.addr + i * MLX5_ASO_AGE_ACTIONS_PER_POOL / 64); wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32)); wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u); wqe->aso_cseg.operand_masks = rte_cpu_to_be_32 (0u | (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) | (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) | (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) | (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET)); wqe->aso_cseg.data_mask = RTE_BE64(UINT64_MAX); } } /** * Initialize Send Queue used for ASO flow meter access. * * @param[in] sq * ASO SQ to initialize. */ static void mlx5_aso_mtr_init_sq(struct mlx5_aso_sq *sq) { volatile struct mlx5_aso_wqe *restrict wqe; int i; int size = 1 << sq->log_desc_n; /* All the next fields state should stay constant. */ for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) { wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) | (sizeof(*wqe) >> 4)); wqe->aso_cseg.operand_masks = RTE_BE32(0u | (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) | (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) | (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) | (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET)); wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET); } } /* * Initialize Send Queue used for ASO connection tracking. * * @param[in] sq * ASO SQ to initialize. */ static void mlx5_aso_ct_init_sq(struct mlx5_aso_sq *sq) { volatile struct mlx5_aso_wqe *restrict wqe; int i; int size = 1 << sq->log_desc_n; uint64_t addr; /* All the next fields state should stay constant. */ for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) { wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) | (sizeof(*wqe) >> 4)); /* One unique MR for the query data. */ wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.lkey); /* Magic number 64 represents the length of a ASO CT obj. */ addr = (uint64_t)((uintptr_t)sq->mr.addr + i * 64); wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32)); wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u); /* * The values of operand_masks are different for modify * and query. * And data_mask may be different for each modification. In * query, it could be zero and ignored. * CQE generation is always needed, in order to decide when * it is available to create the flow or read the data. */ wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET); } } /** * Create Send Queue used for ASO access. * * @param[in] cdev * Pointer to the mlx5 common device. * @param[in/out] sq * Pointer to SQ to create. * @param[in] uar * User Access Region object. * * @return * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int mlx5_aso_sq_create(struct mlx5_common_device *cdev, struct mlx5_aso_sq *sq, void *uar, uint16_t log_desc_n) { struct mlx5_devx_cq_attr cq_attr = { .uar_page_id = mlx5_os_get_devx_uar_page_id(uar), }; struct mlx5_devx_create_sq_attr sq_attr = { .user_index = 0xFFFF, .wq_attr = (struct mlx5_devx_wq_attr){ .pd = cdev->pdn, .uar_page = mlx5_os_get_devx_uar_page_id(uar), }, .ts_format = mlx5_ts_format_conv(cdev->config.hca_attr.sq_ts_format), }; struct mlx5_devx_modify_sq_attr modify_attr = { .state = MLX5_SQC_STATE_RDY, }; uint16_t log_wqbb_n; int ret; if (mlx5_devx_cq_create(cdev->ctx, &sq->cq.cq_obj, log_desc_n, &cq_attr, SOCKET_ID_ANY)) goto error; sq->cq.cq_ci = 0; sq->cq.log_desc_n = log_desc_n; sq->log_desc_n = log_desc_n; sq_attr.cqn = sq->cq.cq_obj.cq->id; /* for mlx5_aso_wqe that is twice the size of mlx5_wqe */ log_wqbb_n = sq->log_desc_n + 1; ret = mlx5_devx_sq_create(cdev->ctx, &sq->sq_obj, log_wqbb_n, &sq_attr, SOCKET_ID_ANY); if (ret) { DRV_LOG(ERR, "Can't create SQ object."); rte_errno = ENOMEM; goto error; } ret = mlx5_devx_cmd_modify_sq(sq->sq_obj.sq, &modify_attr); if (ret) { DRV_LOG(ERR, "Can't change SQ state to ready."); rte_errno = ENOMEM; goto error; } sq->pi = 0; sq->head = 0; sq->tail = 0; sq->sqn = sq->sq_obj.sq->id; rte_spinlock_init(&sq->sqsl); return 0; error: mlx5_aso_destroy_sq(sq); return -1; } void mlx5_aso_mtr_queue_uninit(struct mlx5_dev_ctx_shared *sh __rte_unused, struct mlx5_aso_mtr_pool *hws_pool, struct mlx5_aso_mtr_pools_mng *pool_mng) { uint32_t i; if (hws_pool) { for (i = 0; i < hws_pool->nb_sq; i++) mlx5_aso_destroy_sq(hws_pool->sq + i); mlx5_free(hws_pool->sq); return; } if (pool_mng) mlx5_aso_destroy_sq(&pool_mng->sq); } int mlx5_aso_mtr_queue_init(struct mlx5_dev_ctx_shared *sh, struct mlx5_aso_mtr_pool *hws_pool, struct mlx5_aso_mtr_pools_mng *pool_mng, uint32_t nb_queues) { struct mlx5_common_device *cdev = sh->cdev; struct mlx5_aso_sq *sq; uint32_t i; if (hws_pool) { sq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(struct mlx5_aso_sq) * nb_queues, RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); if (!sq) return -1; hws_pool->sq = sq; for (i = 0; i < nb_queues; i++) { if (mlx5_aso_sq_create(cdev, hws_pool->sq + i, sh->tx_uar.obj, MLX5_ASO_QUEUE_LOG_DESC)) goto error; mlx5_aso_mtr_init_sq(hws_pool->sq + i); } hws_pool->nb_sq = nb_queues; } if (pool_mng) { if (mlx5_aso_sq_create(cdev, &pool_mng->sq, sh->tx_uar.obj, MLX5_ASO_QUEUE_LOG_DESC)) return -1; mlx5_aso_mtr_init_sq(&pool_mng->sq); } return 0; error: do { mlx5_aso_destroy_sq(hws_pool->sq + i); } while (i--); return -1; } /** * API to create and initialize Send Queue used for ASO access. * * @param[in] sh * Pointer to shared device context. * @param[in] aso_opc_mod * Mode of ASO feature. * @param[in] nb_queues * Number of Send Queues to create. * * @return * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh, enum mlx5_access_aso_opc_mod aso_opc_mod, uint32_t nb_queues) { uint32_t sq_desc_n = 1 << MLX5_ASO_QUEUE_LOG_DESC; struct mlx5_common_device *cdev = sh->cdev; switch (aso_opc_mod) { case ASO_OPC_MOD_FLOW_HIT: if (mlx5_aso_reg_mr(cdev, (MLX5_ASO_AGE_ACTIONS_PER_POOL / 8) * sq_desc_n, &sh->aso_age_mng->aso_sq.mr)) return -1; if (mlx5_aso_sq_create(cdev, &sh->aso_age_mng->aso_sq, sh->tx_uar.obj, MLX5_ASO_QUEUE_LOG_DESC)) { mlx5_aso_dereg_mr(cdev, &sh->aso_age_mng->aso_sq.mr); return -1; } mlx5_aso_age_init_sq(&sh->aso_age_mng->aso_sq); break; case ASO_OPC_MOD_POLICER: if (mlx5_aso_mtr_queue_init(sh, NULL, &sh->mtrmng->pools_mng, nb_queues)) return -1; break; case ASO_OPC_MOD_CONNECTION_TRACKING: if (mlx5_aso_ct_queue_init(sh, sh->ct_mng, MLX5_ASO_CT_SQ_NUM)) return -1; break; default: DRV_LOG(ERR, "Unknown ASO operation mode"); return -1; } return 0; } /** * API to destroy Send Queue used for ASO access. * * @param[in] sh * Pointer to shared device context. * @param[in] aso_opc_mod * Mode of ASO feature. */ void mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh, enum mlx5_access_aso_opc_mod aso_opc_mod) { struct mlx5_aso_sq *sq = NULL; switch (aso_opc_mod) { case ASO_OPC_MOD_FLOW_HIT: mlx5_aso_dereg_mr(sh->cdev, &sh->aso_age_mng->aso_sq.mr); sq = &sh->aso_age_mng->aso_sq; break; case ASO_OPC_MOD_POLICER: mlx5_aso_mtr_queue_uninit(sh, NULL, &sh->mtrmng->pools_mng); break; case ASO_OPC_MOD_CONNECTION_TRACKING: mlx5_aso_ct_queue_uninit(sh, sh->ct_mng); break; default: DRV_LOG(ERR, "Unknown ASO operation mode"); return; } if (sq) mlx5_aso_destroy_sq(sq); } /** * Write a burst of WQEs to ASO SQ. * * @param[in] sh * Pointer to shared device context. * @param[in] n * Index of the last valid pool. * * @return * Number of WQEs in burst. */ static uint16_t mlx5_aso_sq_enqueue_burst(struct mlx5_dev_ctx_shared *sh, uint16_t n) { struct mlx5_aso_age_mng *mng = sh->aso_age_mng; volatile struct mlx5_aso_wqe *wqe; struct mlx5_aso_sq *sq = &mng->aso_sq; struct mlx5_aso_age_pool *pool; uint16_t size = 1 << sq->log_desc_n; uint16_t mask = size - 1; uint16_t max; uint16_t start_head = sq->head; max = RTE_MIN(size - (uint16_t)(sq->head - sq->tail), n - sq->next); if (unlikely(!max)) return 0; sq->elts[start_head & mask].burst_size = max; do { wqe = &sq->sq_obj.aso_wqes[sq->head & mask]; rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]); /* Fill next WQE. */ rte_rwlock_read_lock(&mng->resize_rwl); pool = mng->pools[sq->next]; rte_rwlock_read_unlock(&mng->resize_rwl); sq->elts[sq->head & mask].pool = pool; wqe->general_cseg.misc = rte_cpu_to_be_32(((struct mlx5_devx_obj *) (pool->flow_hit_aso_obj))->id); wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << MLX5_COMP_MODE_OFFSET); wqe->general_cseg.opcode = rte_cpu_to_be_32 (MLX5_OPCODE_ACCESS_ASO | (ASO_OPC_MOD_FLOW_HIT << WQE_CSEG_OPC_MOD_OFFSET) | (sq->pi << WQE_CSEG_WQE_INDEX_OFFSET)); sq->pi += 2; /* Each WQE contains 2 WQEBB's. */ sq->head++; sq->next++; max--; } while (max); wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET); mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe, sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR], !sh->tx_uar.dbnc); return sq->elts[start_head & mask].burst_size; } /** * Debug utility function. Dump contents of error CQE and WQE. * * @param[in] cqe * Error CQE to dump. * @param[in] wqe * Error WQE to dump. */ static void mlx5_aso_dump_err_objs(volatile uint32_t *cqe, volatile uint32_t *wqe) { int i; DRV_LOG(ERR, "Error cqe:"); for (i = 0; i < 16; i += 4) DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1], cqe[i + 2], cqe[i + 3]); DRV_LOG(ERR, "\nError wqe:"); for (i = 0; i < (int)sizeof(struct mlx5_aso_wqe) / 4; i += 4) DRV_LOG(ERR, "%08X %08X %08X %08X", wqe[i], wqe[i + 1], wqe[i + 2], wqe[i + 3]); } /** * Handle case of error CQE. * * @param[in] sq * ASO SQ to use. */ static void mlx5_aso_cqe_err_handle(struct mlx5_aso_sq *sq) { struct mlx5_aso_cq *cq = &sq->cq; uint32_t idx = cq->cq_ci & ((1 << cq->log_desc_n) - 1); volatile struct mlx5_err_cqe *cqe = (volatile struct mlx5_err_cqe *)&cq->cq_obj.cqes[idx]; cq->errors++; idx = rte_be_to_cpu_16(cqe->wqe_counter) & (1u << sq->log_desc_n); mlx5_aso_dump_err_objs((volatile uint32_t *)cqe, (volatile uint32_t *)&sq->sq_obj.aso_wqes[idx]); } int mlx5_aso_pull_completion(struct mlx5_aso_sq *sq, struct rte_flow_op_result res[], uint16_t n_res) { struct mlx5_aso_cq *cq = &sq->cq; volatile struct mlx5_cqe *restrict cqe; const uint32_t cq_size = 1 << cq->log_desc_n; const uint32_t mask = cq_size - 1; uint32_t idx; uint32_t next_idx; uint16_t max; uint16_t n = 0; int ret; max = (uint16_t)(sq->head - sq->tail); if (unlikely(!max || !n_res)) return 0; next_idx = cq->cq_ci & mask; do { idx = next_idx; next_idx = (cq->cq_ci + 1) & mask; /* Need to confirm the position of the prefetch. */ rte_prefetch0(&cq->cq_obj.cqes[next_idx]); cqe = &cq->cq_obj.cqes[idx]; ret = check_cqe(cqe, cq_size, cq->cq_ci); /* * Be sure owner read is done before any other cookie field or * opaque field. */ rte_io_rmb(); if (ret == MLX5_CQE_STATUS_HW_OWN) break; res[n].user_data = sq->elts[(uint16_t)((sq->tail + n) & mask)].user_data; if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { mlx5_aso_cqe_err_handle(sq); res[n].status = RTE_FLOW_OP_ERROR; } else { res[n].status = RTE_FLOW_OP_SUCCESS; } cq->cq_ci++; if (++n == n_res) break; } while (1); if (likely(n)) { sq->tail += n; rte_io_wmb(); cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci); } return n; } void mlx5_aso_push_wqe(struct mlx5_dev_ctx_shared *sh, struct mlx5_aso_sq *sq) { if (sq->db_pi == sq->pi) return; mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)sq->db, sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR], !sh->tx_uar.dbnc); sq->db_pi = sq->pi; } /** * Update ASO objects upon completion. * * @param[in] sh * Shared device context. * @param[in] n * Number of completed ASO objects. */ static void mlx5_aso_age_action_update(struct mlx5_dev_ctx_shared *sh, uint16_t n) { struct mlx5_aso_age_mng *mng = sh->aso_age_mng; struct mlx5_aso_sq *sq = &mng->aso_sq; struct mlx5_age_info *age_info; const uint16_t size = 1 << sq->log_desc_n; const uint16_t mask = size - 1; const uint64_t curr = MLX5_CURR_TIME_SEC; uint16_t expected = AGE_CANDIDATE; uint16_t i; for (i = 0; i < n; ++i) { uint16_t idx = (sq->tail + i) & mask; struct mlx5_aso_age_pool *pool = sq->elts[idx].pool; uint64_t diff = curr - pool->time_of_last_age_check; uint64_t *addr = sq->mr.addr; int j; addr += idx * MLX5_ASO_AGE_ACTIONS_PER_POOL / 64; pool->time_of_last_age_check = curr; for (j = 0; j < MLX5_ASO_AGE_ACTIONS_PER_POOL; j++) { struct mlx5_aso_age_action *act = &pool->actions[j]; struct mlx5_age_param *ap = &act->age_params; uint8_t byte; uint8_t offset; uint8_t *u8addr; uint8_t hit; if (__atomic_load_n(&ap->state, __ATOMIC_RELAXED) != AGE_CANDIDATE) continue; byte = 63 - (j / 8); offset = j % 8; u8addr = (uint8_t *)addr; hit = (u8addr[byte] >> offset) & 0x1; if (hit) { __atomic_store_n(&ap->sec_since_last_hit, 0, __ATOMIC_RELAXED); } else { struct mlx5_priv *priv; __atomic_fetch_add(&ap->sec_since_last_hit, diff, __ATOMIC_RELAXED); /* If timeout passed add to aged-out list. */ if (ap->sec_since_last_hit <= ap->timeout) continue; priv = rte_eth_devices[ap->port_id].data->dev_private; age_info = GET_PORT_AGE_INFO(priv); rte_spinlock_lock(&age_info->aged_sl); if (__atomic_compare_exchange_n(&ap->state, &expected, AGE_TMOUT, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) { LIST_INSERT_HEAD(&age_info->aged_aso, act, next); MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW); } rte_spinlock_unlock(&age_info->aged_sl); } } } mlx5_age_event_prepare(sh); } /** * Handle completions from WQEs sent to ASO SQ. * * @param[in] sh * Shared device context. * * @return * Number of CQEs handled. */ static uint16_t mlx5_aso_completion_handle(struct mlx5_dev_ctx_shared *sh) { struct mlx5_aso_age_mng *mng = sh->aso_age_mng; struct mlx5_aso_sq *sq = &mng->aso_sq; struct mlx5_aso_cq *cq = &sq->cq; volatile struct mlx5_cqe *restrict cqe; const unsigned int cq_size = 1 << cq->log_desc_n; const unsigned int mask = cq_size - 1; uint32_t idx; uint32_t next_idx = cq->cq_ci & mask; const uint16_t max = (uint16_t)(sq->head - sq->tail); uint16_t i = 0; int ret; if (unlikely(!max)) return 0; do { idx = next_idx; next_idx = (cq->cq_ci + 1) & mask; rte_prefetch0(&cq->cq_obj.cqes[next_idx]); cqe = &cq->cq_obj.cqes[idx]; ret = check_cqe(cqe, cq_size, cq->cq_ci); /* * Be sure owner read is done before any other cookie field or * opaque field. */ rte_io_rmb(); if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { if (likely(ret == MLX5_CQE_STATUS_HW_OWN)) break; mlx5_aso_cqe_err_handle(sq); } else { i += sq->elts[(sq->tail + i) & mask].burst_size; } cq->cq_ci++; } while (1); if (likely(i)) { mlx5_aso_age_action_update(sh, i); sq->tail += i; rte_io_wmb(); cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci); } return i; } /** * Periodically read CQEs and send WQEs to ASO SQ. * * @param[in] arg * Shared device context containing the ASO SQ. */ static void mlx5_flow_aso_alarm(void *arg) { struct mlx5_dev_ctx_shared *sh = arg; struct mlx5_aso_sq *sq = &sh->aso_age_mng->aso_sq; uint32_t us = 100u; uint16_t n; rte_rwlock_read_lock(&sh->aso_age_mng->resize_rwl); n = sh->aso_age_mng->next; rte_rwlock_read_unlock(&sh->aso_age_mng->resize_rwl); mlx5_aso_completion_handle(sh); if (sq->next == n) { /* End of loop: wait 1 second. */ us = US_PER_S; sq->next = 0; } mlx5_aso_sq_enqueue_burst(sh, n); if (rte_eal_alarm_set(us, mlx5_flow_aso_alarm, sh)) DRV_LOG(ERR, "Cannot reinitialize aso alarm."); } /** * API to start ASO access using ASO SQ. * * @param[in] sh * Pointer to shared device context. * * @return * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_aso_flow_hit_queue_poll_start(struct mlx5_dev_ctx_shared *sh) { if (rte_eal_alarm_set(US_PER_S, mlx5_flow_aso_alarm, sh)) { DRV_LOG(ERR, "Cannot reinitialize ASO age alarm."); return -rte_errno; } return 0; } /** * API to stop ASO access using ASO SQ. * * @param[in] sh * Pointer to shared device context. * * @return * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_aso_flow_hit_queue_poll_stop(struct mlx5_dev_ctx_shared *sh) { int retries = 1024; if (!sh->aso_age_mng->aso_sq.sq_obj.sq) return -EINVAL; rte_errno = 0; while (--retries) { rte_eal_alarm_cancel(mlx5_flow_aso_alarm, sh); if (rte_errno != EINPROGRESS) break; rte_pause(); } return -rte_errno; } static uint16_t mlx5_aso_mtr_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh, struct mlx5_aso_sq *sq, struct mlx5_aso_mtr *aso_mtr, struct mlx5_mtr_bulk *bulk, bool need_lock, void *user_data, bool push) { volatile struct mlx5_aso_wqe *wqe = NULL; struct mlx5_flow_meter_info *fm = NULL; struct mlx5_flow_meter_profile *fmp; uint16_t size = 1 << sq->log_desc_n; uint16_t mask = size - 1; uint16_t res; uint32_t dseg_idx = 0; struct mlx5_aso_mtr_pool *pool = NULL; uint32_t param_le; int id; if (need_lock) rte_spinlock_lock(&sq->sqsl); res = size - (uint16_t)(sq->head - sq->tail); if (unlikely(!res)) { DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send"); if (need_lock) rte_spinlock_unlock(&sq->sqsl); return 0; } wqe = &sq->sq_obj.aso_wqes[sq->head & mask]; rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]); /* Fill next WQE. */ fm = &aso_mtr->fm; sq->elts[sq->head & mask].mtr = user_data ? user_data : aso_mtr; if (aso_mtr->type == ASO_METER_INDIRECT) { if (likely(sh->config.dv_flow_en == 2)) pool = aso_mtr->pool; else pool = container_of(aso_mtr, struct mlx5_aso_mtr_pool, mtrs[aso_mtr->offset]); id = pool->devx_obj->id; } else { id = bulk->devx_obj->id; } wqe->general_cseg.misc = rte_cpu_to_be_32(id + (aso_mtr->offset >> 1)); wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO | (ASO_OPC_MOD_POLICER << WQE_CSEG_OPC_MOD_OFFSET) | sq->pi << WQE_CSEG_WQE_INDEX_OFFSET); /* There are 2 meters in one ASO cache line. */ dseg_idx = aso_mtr->offset & 0x1; wqe->aso_cseg.data_mask = RTE_BE64(MLX5_IFC_FLOW_METER_PARAM_MASK << (32 * !dseg_idx)); if (fm->is_enable) { wqe->aso_dseg.mtrs[dseg_idx].cbs_cir = fm->profile->srtcm_prm.cbs_cir; wqe->aso_dseg.mtrs[dseg_idx].ebs_eir = fm->profile->srtcm_prm.ebs_eir; } else { wqe->aso_dseg.mtrs[dseg_idx].cbs_cir = RTE_BE32(MLX5_IFC_FLOW_METER_DISABLE_CBS_CIR_VAL); wqe->aso_dseg.mtrs[dseg_idx].ebs_eir = 0; } fmp = fm->profile; param_le = (1 << ASO_DSEG_VALID_OFFSET); if (fm->color_aware) param_le |= (MLX5_FLOW_COLOR_UNDEFINED << ASO_DSEG_SC_OFFSET); else param_le |= (MLX5_FLOW_COLOR_GREEN << ASO_DSEG_SC_OFFSET); if (fmp->profile.packet_mode) param_le |= (MLX5_METER_MODE_PKT << ASO_DSEG_MTR_MODE); wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm = RTE_BE32(param_le); switch (fmp->profile.alg) { case RTE_MTR_SRTCM_RFC2697: /* Only needed for RFC2697. */ if (fm->profile->srtcm_prm.ebs_eir) wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |= RTE_BE32(1 << ASO_DSEG_BO_OFFSET); break; case RTE_MTR_TRTCM_RFC2698: wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |= RTE_BE32(1 << ASO_DSEG_BBOG_OFFSET); break; case RTE_MTR_TRTCM_RFC4115: wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |= RTE_BE32(1 << ASO_DSEG_BO_OFFSET); break; default: break; } /* * Note: * Due to software performance reason, the token fields will not be * set when posting the WQE to ASO SQ. It will be filled by the HW * automatically. */ sq->head++; sq->pi += 2;/* Each WQE contains 2 WQEBB's. */ if (push) { mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe, sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR], !sh->tx_uar.dbnc); sq->db_pi = sq->pi; } sq->db = wqe; if (need_lock) rte_spinlock_unlock(&sq->sqsl); return 1; } static void mlx5_aso_mtrs_status_update(struct mlx5_aso_sq *sq, uint16_t aso_mtrs_nums) { uint16_t size = 1 << sq->log_desc_n; uint16_t mask = size - 1; uint16_t i; struct mlx5_aso_mtr *aso_mtr = NULL; uint8_t exp_state = ASO_METER_WAIT; for (i = 0; i < aso_mtrs_nums; ++i) { aso_mtr = sq->elts[(sq->tail + i) & mask].mtr; MLX5_ASSERT(aso_mtr); (void)__atomic_compare_exchange_n(&aso_mtr->state, &exp_state, ASO_METER_READY, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED); } } static void mlx5_aso_mtr_completion_handle(struct mlx5_aso_sq *sq, bool need_lock) { struct mlx5_aso_cq *cq = &sq->cq; volatile struct mlx5_cqe *restrict cqe; const unsigned int cq_size = 1 << cq->log_desc_n; const unsigned int mask = cq_size - 1; uint32_t idx; uint32_t next_idx = cq->cq_ci & mask; uint16_t max; uint16_t n = 0; int ret; if (need_lock) rte_spinlock_lock(&sq->sqsl); max = (uint16_t)(sq->head - sq->tail); if (unlikely(!max)) { rte_spinlock_unlock(&sq->sqsl); return; } do { idx = next_idx; next_idx = (cq->cq_ci + 1) & mask; rte_prefetch0(&cq->cq_obj.cqes[next_idx]); cqe = &cq->cq_obj.cqes[idx]; ret = check_cqe(cqe, cq_size, cq->cq_ci); /* * Be sure owner read is done before any other cookie field or * opaque field. */ rte_io_rmb(); if (ret != MLX5_CQE_STATUS_SW_OWN) { if (likely(ret == MLX5_CQE_STATUS_HW_OWN)) break; mlx5_aso_cqe_err_handle(sq); } else { n++; } cq->cq_ci++; } while (1); if (likely(n)) { mlx5_aso_mtrs_status_update(sq, n); sq->tail += n; rte_io_wmb(); cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci); } if (need_lock) rte_spinlock_unlock(&sq->sqsl); } /** * Update meter parameter by send WQE. * * @param[in] dev * Pointer to Ethernet device. * @param[in] priv * Pointer to mlx5 private data structure. * @param[in] fm * Pointer to flow meter to be modified. * * @return * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_aso_meter_update_by_wqe(struct mlx5_dev_ctx_shared *sh, uint32_t queue, struct mlx5_aso_mtr *mtr, struct mlx5_mtr_bulk *bulk, void *user_data, bool push) { struct mlx5_aso_sq *sq; uint32_t poll_wqe_times = MLX5_MTR_POLL_WQE_CQE_TIMES; bool need_lock; int ret; if (likely(sh->config.dv_flow_en == 2) && mtr->type == ASO_METER_INDIRECT) { if (queue == MLX5_HW_INV_QUEUE) { sq = &mtr->pool->sq[mtr->pool->nb_sq - 1]; need_lock = true; } else { sq = &mtr->pool->sq[queue]; need_lock = false; } } else { sq = &sh->mtrmng->pools_mng.sq; need_lock = true; } if (queue != MLX5_HW_INV_QUEUE) { ret = mlx5_aso_mtr_sq_enqueue_single(sh, sq, mtr, bulk, need_lock, user_data, push); return ret > 0 ? 0 : -1; } do { mlx5_aso_mtr_completion_handle(sq, need_lock); if (mlx5_aso_mtr_sq_enqueue_single(sh, sq, mtr, bulk, need_lock, NULL, true)) return 0; /* Waiting for wqe resource. */ rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY); } while (--poll_wqe_times); DRV_LOG(ERR, "Fail to send WQE for ASO meter offset %d", mtr->offset); return -1; } /** * Wait for meter to be ready. * * @param[in] dev * Pointer to Ethernet device. * @param[in] priv * Pointer to mlx5 private data structure. * @param[in] fm * Pointer to flow meter to be modified. * * @return * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh, uint32_t queue, struct mlx5_aso_mtr *mtr) { struct mlx5_aso_sq *sq; uint32_t poll_cqe_times = MLX5_MTR_POLL_WQE_CQE_TIMES; uint8_t state; bool need_lock; if (likely(sh->config.dv_flow_en == 2) && mtr->type == ASO_METER_INDIRECT) { if (queue == MLX5_HW_INV_QUEUE) { sq = &mtr->pool->sq[mtr->pool->nb_sq - 1]; need_lock = true; } else { sq = &mtr->pool->sq[queue]; need_lock = false; } } else { sq = &sh->mtrmng->pools_mng.sq; need_lock = true; } state = __atomic_load_n(&mtr->state, __ATOMIC_RELAXED); if (state == ASO_METER_READY || state == ASO_METER_WAIT_ASYNC) return 0; do { mlx5_aso_mtr_completion_handle(sq, need_lock); if (__atomic_load_n(&mtr->state, __ATOMIC_RELAXED) == ASO_METER_READY) return 0; /* Waiting for CQE ready. */ rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY); } while (--poll_cqe_times); DRV_LOG(ERR, "Fail to poll CQE ready for ASO meter offset %d", mtr->offset); return -1; } static inline struct mlx5_aso_sq* __mlx5_aso_ct_get_sq_in_hws(uint32_t queue, struct mlx5_aso_ct_pool *pool) { return (queue == MLX5_HW_INV_QUEUE) ? pool->shared_sq : &pool->sq[queue]; } static inline struct mlx5_aso_sq* __mlx5_aso_ct_get_sq_in_sws(struct mlx5_dev_ctx_shared *sh, struct mlx5_aso_ct_action *ct) { return &sh->ct_mng->aso_sqs[ct->offset & (MLX5_ASO_CT_SQ_NUM - 1)]; } static inline struct mlx5_aso_ct_pool* __mlx5_aso_ct_get_pool(struct mlx5_dev_ctx_shared *sh, struct mlx5_aso_ct_action *ct) { if (likely(sh->config.dv_flow_en == 2)) return ct->pool; return container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]); } int mlx5_aso_ct_queue_uninit(struct mlx5_dev_ctx_shared *sh, struct mlx5_aso_ct_pools_mng *ct_mng) { uint32_t i; /* 64B per object for query. */ for (i = 0; i < ct_mng->nb_sq; i++) { if (ct_mng->aso_sqs[i].mr.addr) mlx5_aso_dereg_mr(sh->cdev, &ct_mng->aso_sqs[i].mr); mlx5_aso_destroy_sq(&ct_mng->aso_sqs[i]); } return 0; } /** * API to create and initialize CT Send Queue used for ASO access. * * @param[in] sh * Pointer to shared device context. * @param[in] ct_mng * Pointer to the CT management struct. * *param[in] nb_queues * Number of queues to be allocated. * * @return * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_aso_ct_queue_init(struct mlx5_dev_ctx_shared *sh, struct mlx5_aso_ct_pools_mng *ct_mng, uint32_t nb_queues) { uint32_t i; /* 64B per object for query. */ for (i = 0; i < nb_queues; i++) { if (mlx5_aso_reg_mr(sh->cdev, 64 * (1 << MLX5_ASO_QUEUE_LOG_DESC), &ct_mng->aso_sqs[i].mr)) goto error; if (mlx5_aso_sq_create(sh->cdev, &ct_mng->aso_sqs[i], sh->tx_uar.obj, MLX5_ASO_QUEUE_LOG_DESC)) goto error; mlx5_aso_ct_init_sq(&ct_mng->aso_sqs[i]); } ct_mng->nb_sq = nb_queues; return 0; error: do { if (ct_mng->aso_sqs[i].mr.addr) mlx5_aso_dereg_mr(sh->cdev, &ct_mng->aso_sqs[i].mr); mlx5_aso_destroy_sq(&ct_mng->aso_sqs[i]); } while (i--); ct_mng->nb_sq = 0; return -1; } /* * Post a WQE to the ASO CT SQ to modify the context. * * @param[in] sh * Pointer to shared device context. * @param[in] ct * Pointer to the generic CT structure related to the context. * @param[in] profile * Pointer to configuration profile. * * @return * 1 on success (WQE number), 0 on failure. */ static uint16_t mlx5_aso_ct_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh, struct mlx5_aso_sq *sq, struct mlx5_aso_ct_action *ct, const struct rte_flow_action_conntrack *profile, bool need_lock, void *user_data, bool push) { volatile struct mlx5_aso_wqe *wqe = NULL; uint16_t size = 1 << sq->log_desc_n; uint16_t mask = size - 1; uint16_t res; struct mlx5_aso_ct_pool *pool; void *desg; void *orig_dir; void *reply_dir; if (need_lock) rte_spinlock_lock(&sq->sqsl); /* Prevent other threads to update the index. */ res = size - (uint16_t)(sq->head - sq->tail); if (unlikely(!res)) { if (need_lock) rte_spinlock_unlock(&sq->sqsl); DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send"); return 0; } wqe = &sq->sq_obj.aso_wqes[sq->head & mask]; rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]); /* Fill next WQE. */ MLX5_ASO_CT_UPDATE_STATE(ct, user_data ? ASO_CONNTRACK_WAIT_ASYNC : ASO_CONNTRACK_WAIT); if (user_data) { sq->elts[sq->head & mask].user_data = user_data; } else { sq->elts[sq->head & mask].ct = ct; sq->elts[sq->head & mask].query_data = NULL; } pool = __mlx5_aso_ct_get_pool(sh, ct); /* Each WQE will have a single CT object. */ wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id + ct->offset); wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO | (ASO_OPC_MOD_CONNECTION_TRACKING << WQE_CSEG_OPC_MOD_OFFSET) | sq->pi << WQE_CSEG_WQE_INDEX_OFFSET); wqe->aso_cseg.operand_masks = rte_cpu_to_be_32 (0u | (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) | (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) | (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) | (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET)); wqe->aso_cseg.data_mask = UINT64_MAX; /* To make compiler happy. */ desg = (void *)(uintptr_t)wqe->aso_dseg.data; MLX5_SET(conn_track_aso, desg, valid, 1); MLX5_SET(conn_track_aso, desg, state, profile->state); MLX5_SET(conn_track_aso, desg, freeze_track, !profile->enable); MLX5_SET(conn_track_aso, desg, connection_assured, profile->live_connection); MLX5_SET(conn_track_aso, desg, sack_permitted, profile->selective_ack); MLX5_SET(conn_track_aso, desg, challenged_acked, profile->challenge_ack_passed); /* Heartbeat, retransmission_counter, retranmission_limit_exceeded: 0 */ MLX5_SET(conn_track_aso, desg, heartbeat, 0); MLX5_SET(conn_track_aso, desg, max_ack_window, profile->max_ack_window); MLX5_SET(conn_track_aso, desg, retransmission_counter, 0); MLX5_SET(conn_track_aso, desg, retranmission_limit_exceeded, 0); MLX5_SET(conn_track_aso, desg, retranmission_limit, profile->retransmission_limit); MLX5_SET(conn_track_aso, desg, reply_direction_tcp_scale, profile->reply_dir.scale); MLX5_SET(conn_track_aso, desg, reply_direction_tcp_close_initiated, profile->reply_dir.close_initiated); /* Both directions will use the same liberal mode. */ MLX5_SET(conn_track_aso, desg, reply_direction_tcp_liberal_enabled, profile->liberal_mode); MLX5_SET(conn_track_aso, desg, reply_direction_tcp_data_unacked, profile->reply_dir.data_unacked); MLX5_SET(conn_track_aso, desg, reply_direction_tcp_max_ack, profile->reply_dir.last_ack_seen); MLX5_SET(conn_track_aso, desg, original_direction_tcp_scale, profile->original_dir.scale); MLX5_SET(conn_track_aso, desg, original_direction_tcp_close_initiated, profile->original_dir.close_initiated); MLX5_SET(conn_track_aso, desg, original_direction_tcp_liberal_enabled, profile->liberal_mode); MLX5_SET(conn_track_aso, desg, original_direction_tcp_data_unacked, profile->original_dir.data_unacked); MLX5_SET(conn_track_aso, desg, original_direction_tcp_max_ack, profile->original_dir.last_ack_seen); MLX5_SET(conn_track_aso, desg, last_win, profile->last_window); MLX5_SET(conn_track_aso, desg, last_dir, profile->last_direction); MLX5_SET(conn_track_aso, desg, last_index, profile->last_index); MLX5_SET(conn_track_aso, desg, last_seq, profile->last_seq); MLX5_SET(conn_track_aso, desg, last_ack, profile->last_ack); MLX5_SET(conn_track_aso, desg, last_end, profile->last_end); orig_dir = MLX5_ADDR_OF(conn_track_aso, desg, original_dir); MLX5_SET(tcp_window_params, orig_dir, sent_end, profile->original_dir.sent_end); MLX5_SET(tcp_window_params, orig_dir, reply_end, profile->original_dir.reply_end); MLX5_SET(tcp_window_params, orig_dir, max_win, profile->original_dir.max_win); MLX5_SET(tcp_window_params, orig_dir, max_ack, profile->original_dir.max_ack); reply_dir = MLX5_ADDR_OF(conn_track_aso, desg, reply_dir); MLX5_SET(tcp_window_params, reply_dir, sent_end, profile->reply_dir.sent_end); MLX5_SET(tcp_window_params, reply_dir, reply_end, profile->reply_dir.reply_end); MLX5_SET(tcp_window_params, reply_dir, max_win, profile->reply_dir.max_win); MLX5_SET(tcp_window_params, reply_dir, max_ack, profile->reply_dir.max_ack); sq->head++; sq->pi += 2; /* Each WQE contains 2 WQEBB's. */ if (push) { mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe, sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR], !sh->tx_uar.dbnc); sq->db_pi = sq->pi; } sq->db = wqe; if (need_lock) rte_spinlock_unlock(&sq->sqsl); return 1; } /* * Update the status field of CTs to indicate ready to be used by flows. * A continuous number of CTs since last update. * * @param[in] sq * Pointer to ASO CT SQ. * @param[in] num * Number of CT structures to be updated. * * @return * 0 on success, a negative value. */ static void mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num) { uint16_t size = 1 << sq->log_desc_n; uint16_t mask = size - 1; uint16_t i; struct mlx5_aso_ct_action *ct = NULL; uint16_t idx; for (i = 0; i < num; i++) { idx = (uint16_t)((sq->tail + i) & mask); ct = sq->elts[idx].ct; MLX5_ASSERT(ct); MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_READY); if (sq->elts[idx].query_data) rte_memcpy(sq->elts[idx].query_data, (char *)((uintptr_t)sq->mr.addr + idx * 64), 64); } } /* * Post a WQE to the ASO CT SQ to query the current context. * * @param[in] sh * Pointer to shared device context. * @param[in] ct * Pointer to the generic CT structure related to the context. * @param[in] data * Pointer to data area to be filled. * * @return * 1 on success (WQE number), 0 on failure. */ static int mlx5_aso_ct_sq_query_single(struct mlx5_dev_ctx_shared *sh, struct mlx5_aso_sq *sq, struct mlx5_aso_ct_action *ct, char *data, bool need_lock, void *user_data, bool push) { volatile struct mlx5_aso_wqe *wqe = NULL; uint16_t size = 1 << sq->log_desc_n; uint16_t mask = size - 1; uint16_t res; uint16_t wqe_idx; struct mlx5_aso_ct_pool *pool; enum mlx5_aso_ct_state state = __atomic_load_n(&ct->state, __ATOMIC_RELAXED); if (state == ASO_CONNTRACK_FREE) { DRV_LOG(ERR, "Fail: No context to query"); return -1; } else if (state == ASO_CONNTRACK_WAIT) { return 0; } if (need_lock) rte_spinlock_lock(&sq->sqsl); res = size - (uint16_t)(sq->head - sq->tail); if (unlikely(!res)) { if (need_lock) rte_spinlock_unlock(&sq->sqsl); DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send"); return 0; } MLX5_ASO_CT_UPDATE_STATE(ct, user_data ? ASO_CONNTRACK_WAIT_ASYNC : ASO_CONNTRACK_QUERY); wqe = &sq->sq_obj.aso_wqes[sq->head & mask]; /* Confirm the location and address of the prefetch instruction. */ rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]); /* Fill next WQE. */ wqe_idx = sq->head & mask; /* Check if this is async mode. */ if (user_data) { struct mlx5_hw_q_job *job = (struct mlx5_hw_q_job *)user_data; sq->elts[wqe_idx].ct = user_data; job->out_data = (char *)((uintptr_t)sq->mr.addr + wqe_idx * 64); } else { sq->elts[wqe_idx].query_data = data; sq->elts[wqe_idx].ct = ct; } pool = __mlx5_aso_ct_get_pool(sh, ct); /* Each WQE will have a single CT object. */ wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id + ct->offset); wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO | (ASO_OPC_MOD_CONNECTION_TRACKING << WQE_CSEG_OPC_MOD_OFFSET) | sq->pi << WQE_CSEG_WQE_INDEX_OFFSET); /* * There is no write request is required. * ASO_OPER_LOGICAL_AND and ASO_OP_ALWAYS_FALSE are both 0. * "BYTEWISE_64BYTE" is needed for a whole context. * Set to 0 directly to reduce an endian swap. (Modify should rewrite.) * "data_mask" is ignored. * Buffer address was already filled during initialization. */ wqe->aso_cseg.operand_masks = rte_cpu_to_be_32(BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET); wqe->aso_cseg.data_mask = 0; sq->head++; /* * Each WQE contains 2 WQEBB's, even though * data segment is not used in this case. */ sq->pi += 2; if (push) { mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe, sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR], !sh->tx_uar.dbnc); sq->db_pi = sq->pi; } sq->db = wqe; if (need_lock) rte_spinlock_unlock(&sq->sqsl); return 1; } /* * Handle completions from WQEs sent to ASO CT. * * @param[in] mng * Pointer to the CT pools management structure. */ static void mlx5_aso_ct_completion_handle(struct mlx5_dev_ctx_shared *sh __rte_unused, struct mlx5_aso_sq *sq, bool need_lock) { struct mlx5_aso_cq *cq = &sq->cq; volatile struct mlx5_cqe *restrict cqe; const uint32_t cq_size = 1 << cq->log_desc_n; const uint32_t mask = cq_size - 1; uint32_t idx; uint32_t next_idx; uint16_t max; uint16_t n = 0; int ret; if (need_lock) rte_spinlock_lock(&sq->sqsl); max = (uint16_t)(sq->head - sq->tail); if (unlikely(!max)) { if (need_lock) rte_spinlock_unlock(&sq->sqsl); return; } next_idx = cq->cq_ci & mask; do { idx = next_idx; next_idx = (cq->cq_ci + 1) & mask; /* Need to confirm the position of the prefetch. */ rte_prefetch0(&cq->cq_obj.cqes[next_idx]); cqe = &cq->cq_obj.cqes[idx]; ret = check_cqe(cqe, cq_size, cq->cq_ci); /* * Be sure owner read is done before any other cookie field or * opaque field. */ rte_io_rmb(); if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { if (likely(ret == MLX5_CQE_STATUS_HW_OWN)) break; mlx5_aso_cqe_err_handle(sq); } else { n++; } cq->cq_ci++; } while (1); if (likely(n)) { mlx5_aso_ct_status_update(sq, n); sq->tail += n; rte_io_wmb(); cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci); } if (need_lock) rte_spinlock_unlock(&sq->sqsl); } /* * Update connection tracking ASO context by sending WQE. * * @param[in] sh * Pointer to mlx5_dev_ctx_shared object. * @param[in] queue * The queue index. * @param[in] ct * Pointer to connection tracking offload object. * @param[in] profile * Pointer to connection tracking TCP parameter. * * @return * 0 on success, -1 on failure. */ int mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh, uint32_t queue, struct mlx5_aso_ct_action *ct, const struct rte_flow_action_conntrack *profile, void *user_data, bool push) { uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES; struct mlx5_aso_ct_pool *pool = __mlx5_aso_ct_get_pool(sh, ct); struct mlx5_aso_sq *sq; bool need_lock = !!(queue == MLX5_HW_INV_QUEUE); int ret; if (sh->config.dv_flow_en == 2) sq = __mlx5_aso_ct_get_sq_in_hws(queue, pool); else sq = __mlx5_aso_ct_get_sq_in_sws(sh, ct); if (queue != MLX5_HW_INV_QUEUE) { ret = mlx5_aso_ct_sq_enqueue_single(sh, sq, ct, profile, need_lock, user_data, push); return ret > 0 ? 0 : -1; } do { mlx5_aso_ct_completion_handle(sh, sq, need_lock); if (mlx5_aso_ct_sq_enqueue_single(sh, sq, ct, profile, need_lock, NULL, true)) return 0; /* Waiting for wqe resource. */ rte_delay_us_sleep(10u); } while (--poll_wqe_times); DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d", ct->offset, pool->index); return -1; } /* * The routine is used to wait for WQE completion to continue with queried data. * * @param[in] sh * Pointer to mlx5_dev_ctx_shared object. * @param[in] queue * The queue which CT works on.. * @param[in] ct * Pointer to connection tracking offload object. * * @return * 0 on success, -1 on failure. */ int mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh, uint32_t queue, struct mlx5_aso_ct_action *ct) { uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES; struct mlx5_aso_ct_pool *pool = __mlx5_aso_ct_get_pool(sh, ct); struct mlx5_aso_sq *sq; bool need_lock = !!(queue == MLX5_HW_INV_QUEUE); if (sh->config.dv_flow_en == 2) sq = __mlx5_aso_ct_get_sq_in_hws(queue, pool); else sq = __mlx5_aso_ct_get_sq_in_sws(sh, ct); if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) == ASO_CONNTRACK_READY) return 0; do { mlx5_aso_ct_completion_handle(sh, sq, need_lock); if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) == ASO_CONNTRACK_READY) return 0; /* Waiting for CQE ready, consider should block or sleep. */ rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY); } while (--poll_cqe_times); DRV_LOG(ERR, "Fail to poll CQE for ASO CT %d in pool %d", ct->offset, pool->index); return -1; } /* * Convert the hardware conntrack data format into the profile. * * @param[in] profile * Pointer to conntrack profile to be filled after query. * @param[in] wdata * Pointer to data fetched from hardware. */ void mlx5_aso_ct_obj_analyze(struct rte_flow_action_conntrack *profile, char *wdata) { void *o_dir = MLX5_ADDR_OF(conn_track_aso, wdata, original_dir); void *r_dir = MLX5_ADDR_OF(conn_track_aso, wdata, reply_dir); /* MLX5_GET16 should be taken into consideration. */ profile->state = (enum rte_flow_conntrack_state) MLX5_GET(conn_track_aso, wdata, state); profile->enable = !MLX5_GET(conn_track_aso, wdata, freeze_track); profile->selective_ack = MLX5_GET(conn_track_aso, wdata, sack_permitted); profile->live_connection = MLX5_GET(conn_track_aso, wdata, connection_assured); profile->challenge_ack_passed = MLX5_GET(conn_track_aso, wdata, challenged_acked); profile->max_ack_window = MLX5_GET(conn_track_aso, wdata, max_ack_window); profile->retransmission_limit = MLX5_GET(conn_track_aso, wdata, retranmission_limit); profile->last_window = MLX5_GET(conn_track_aso, wdata, last_win); profile->last_direction = MLX5_GET(conn_track_aso, wdata, last_dir); profile->last_index = (enum rte_flow_conntrack_tcp_last_index) MLX5_GET(conn_track_aso, wdata, last_index); profile->last_seq = MLX5_GET(conn_track_aso, wdata, last_seq); profile->last_ack = MLX5_GET(conn_track_aso, wdata, last_ack); profile->last_end = MLX5_GET(conn_track_aso, wdata, last_end); profile->liberal_mode = MLX5_GET(conn_track_aso, wdata, reply_direction_tcp_liberal_enabled) | MLX5_GET(conn_track_aso, wdata, original_direction_tcp_liberal_enabled); /* No liberal in the RTE structure profile. */ profile->reply_dir.scale = MLX5_GET(conn_track_aso, wdata, reply_direction_tcp_scale); profile->reply_dir.close_initiated = MLX5_GET(conn_track_aso, wdata, reply_direction_tcp_close_initiated); profile->reply_dir.data_unacked = MLX5_GET(conn_track_aso, wdata, reply_direction_tcp_data_unacked); profile->reply_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata, reply_direction_tcp_max_ack); profile->reply_dir.sent_end = MLX5_GET(tcp_window_params, r_dir, sent_end); profile->reply_dir.reply_end = MLX5_GET(tcp_window_params, r_dir, reply_end); profile->reply_dir.max_win = MLX5_GET(tcp_window_params, r_dir, max_win); profile->reply_dir.max_ack = MLX5_GET(tcp_window_params, r_dir, max_ack); profile->original_dir.scale = MLX5_GET(conn_track_aso, wdata, original_direction_tcp_scale); profile->original_dir.close_initiated = MLX5_GET(conn_track_aso, wdata, original_direction_tcp_close_initiated); profile->original_dir.data_unacked = MLX5_GET(conn_track_aso, wdata, original_direction_tcp_data_unacked); profile->original_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata, original_direction_tcp_max_ack); profile->original_dir.sent_end = MLX5_GET(tcp_window_params, o_dir, sent_end); profile->original_dir.reply_end = MLX5_GET(tcp_window_params, o_dir, reply_end); profile->original_dir.max_win = MLX5_GET(tcp_window_params, o_dir, max_win); profile->original_dir.max_ack = MLX5_GET(tcp_window_params, o_dir, max_ack); } /* * Query connection tracking information parameter by send WQE. * * @param[in] dev * Pointer to Ethernet device. * @param[in] ct * Pointer to connection tracking offload object. * @param[out] profile * Pointer to connection tracking TCP information. * * @return * 0 on success, -1 on failure. */ int mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh, uint32_t queue, struct mlx5_aso_ct_action *ct, struct rte_flow_action_conntrack *profile, void *user_data, bool push) { uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES; struct mlx5_aso_ct_pool *pool = __mlx5_aso_ct_get_pool(sh, ct); struct mlx5_aso_sq *sq; bool need_lock = !!(queue == MLX5_HW_INV_QUEUE); char out_data[64 * 2]; int ret; if (sh->config.dv_flow_en == 2) sq = __mlx5_aso_ct_get_sq_in_hws(queue, pool); else sq = __mlx5_aso_ct_get_sq_in_sws(sh, ct); if (queue != MLX5_HW_INV_QUEUE) { ret = mlx5_aso_ct_sq_query_single(sh, sq, ct, out_data, need_lock, user_data, push); return ret > 0 ? 0 : -1; } do { mlx5_aso_ct_completion_handle(sh, sq, need_lock); ret = mlx5_aso_ct_sq_query_single(sh, sq, ct, out_data, need_lock, NULL, true); if (ret < 0) return ret; else if (ret > 0) goto data_handle; /* Waiting for wqe resource or state. */ else rte_delay_us_sleep(10u); } while (--poll_wqe_times); DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d", ct->offset, pool->index); return -1; data_handle: ret = mlx5_aso_ct_wait_ready(sh, queue, ct); if (!ret) mlx5_aso_ct_obj_analyze(profile, out_data); return ret; } /* * Make sure the conntrack context is synchronized with hardware before * creating a flow rule that uses it. * * @param[in] sh * Pointer to shared device context. * @param[in] ct * Pointer to connection tracking offload object. * * @return * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh, uint32_t queue, struct mlx5_aso_ct_action *ct) { struct mlx5_aso_ct_pool *pool = __mlx5_aso_ct_get_pool(sh, ct); struct mlx5_aso_sq *sq; bool need_lock = !!(queue == MLX5_HW_INV_QUEUE); uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES; enum mlx5_aso_ct_state state = __atomic_load_n(&ct->state, __ATOMIC_RELAXED); if (sh->config.dv_flow_en == 2) sq = __mlx5_aso_ct_get_sq_in_hws(queue, pool); else sq = __mlx5_aso_ct_get_sq_in_sws(sh, ct); if (state == ASO_CONNTRACK_FREE) { rte_errno = ENXIO; return -rte_errno; } else if (state == ASO_CONNTRACK_READY || state == ASO_CONNTRACK_QUERY || state == ASO_CONNTRACK_WAIT_ASYNC) { return 0; } do { mlx5_aso_ct_completion_handle(sh, sq, need_lock); state = __atomic_load_n(&ct->state, __ATOMIC_RELAXED); if (state == ASO_CONNTRACK_READY || state == ASO_CONNTRACK_QUERY) return 0; /* Waiting for CQE ready, consider should block or sleep. */ rte_delay_us_block(MLX5_ASO_WQE_CQE_RESPONSE_DELAY); } while (--poll_cqe_times); rte_errno = EBUSY; return -rte_errno; } int mlx5_aso_cnt_queue_init(struct mlx5_dev_ctx_shared *sh) { struct mlx5_hws_aso_mng *aso_mng = NULL; uint8_t idx; struct mlx5_aso_sq *sq; MLX5_ASSERT(sh); MLX5_ASSERT(sh->cnt_svc); aso_mng = &sh->cnt_svc->aso_mng; aso_mng->sq_num = HWS_CNT_ASO_SQ_NUM; for (idx = 0; idx < HWS_CNT_ASO_SQ_NUM; idx++) { sq = &aso_mng->sqs[idx]; if (mlx5_aso_sq_create(sh->cdev, sq, sh->tx_uar.obj, MLX5_ASO_CNT_QUEUE_LOG_DESC)) goto error; mlx5_aso_cnt_init_sq(sq); } return 0; error: mlx5_aso_cnt_queue_uninit(sh); return -1; } void mlx5_aso_cnt_queue_uninit(struct mlx5_dev_ctx_shared *sh) { uint16_t idx; for (idx = 0; idx < sh->cnt_svc->aso_mng.sq_num; idx++) mlx5_aso_destroy_sq(&sh->cnt_svc->aso_mng.sqs[idx]); sh->cnt_svc->aso_mng.sq_num = 0; } static uint16_t mlx5_aso_cnt_sq_enqueue_burst(struct mlx5_hws_cnt_pool *cpool, struct mlx5_dev_ctx_shared *sh, struct mlx5_aso_sq *sq, uint32_t n, uint32_t offset, uint32_t dcs_id_base) { volatile struct mlx5_aso_wqe *wqe; uint16_t size = 1 << sq->log_desc_n; uint16_t mask = size - 1; uint16_t max; uint32_t upper_offset = offset; uint64_t addr; uint32_t ctrl_gen_id = 0; uint8_t opcmod = sh->cdev->config.hca_attr.flow_access_aso_opc_mod; rte_be32_t lkey = rte_cpu_to_be_32(cpool->raw_mng->mr.lkey); uint16_t aso_n = (uint16_t)(RTE_ALIGN_CEIL(n, 4) / 4); uint32_t ccntid; max = RTE_MIN(size - (uint16_t)(sq->head - sq->tail), aso_n); if (unlikely(!max)) return 0; upper_offset += (max * 4); /* Because only one burst at one time, we can use the same elt. */ sq->elts[0].burst_size = max; ctrl_gen_id = dcs_id_base; ctrl_gen_id /= 4; do { ccntid = upper_offset - max * 4; wqe = &sq->sq_obj.aso_wqes[sq->head & mask]; rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]); wqe->general_cseg.misc = rte_cpu_to_be_32(ctrl_gen_id); wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << MLX5_COMP_MODE_OFFSET); wqe->general_cseg.opcode = rte_cpu_to_be_32 (MLX5_OPCODE_ACCESS_ASO | (opcmod << WQE_CSEG_OPC_MOD_OFFSET) | (sq->pi << WQE_CSEG_WQE_INDEX_OFFSET)); addr = (uint64_t)RTE_PTR_ADD(cpool->raw_mng->raw, ccntid * sizeof(struct flow_counter_stats)); wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32)); wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u); wqe->aso_cseg.lkey = lkey; sq->pi += 2; /* Each WQE contains 2 WQEBB's. */ sq->head++; sq->next++; ctrl_gen_id++; max--; } while (max); wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET); mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe, sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR], !sh->tx_uar.dbnc); return sq->elts[0].burst_size; } static uint16_t mlx5_aso_cnt_completion_handle(struct mlx5_aso_sq *sq) { struct mlx5_aso_cq *cq = &sq->cq; volatile struct mlx5_cqe *restrict cqe; const unsigned int cq_size = 1 << cq->log_desc_n; const unsigned int mask = cq_size - 1; uint32_t idx; uint32_t next_idx = cq->cq_ci & mask; const uint16_t max = (uint16_t)(sq->head - sq->tail); uint16_t i = 0; int ret; if (unlikely(!max)) return 0; idx = next_idx; next_idx = (cq->cq_ci + 1) & mask; rte_prefetch0(&cq->cq_obj.cqes[next_idx]); cqe = &cq->cq_obj.cqes[idx]; ret = check_cqe(cqe, cq_size, cq->cq_ci); /* * Be sure owner read is done before any other cookie field or * opaque field. */ rte_io_rmb(); if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { if (likely(ret == MLX5_CQE_STATUS_HW_OWN)) return 0; /* return immediately. */ mlx5_aso_cqe_err_handle(sq); } i += sq->elts[0].burst_size; sq->elts[0].burst_size = 0; cq->cq_ci++; if (likely(i)) { sq->tail += i; rte_io_wmb(); cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci); } return i; } static uint16_t mlx5_aso_cnt_query_one_dcs(struct mlx5_dev_ctx_shared *sh, struct mlx5_hws_cnt_pool *cpool, uint8_t dcs_idx, uint32_t num) { uint32_t dcs_id = cpool->dcs_mng.dcs[dcs_idx].obj->id; uint64_t cnt_num = cpool->dcs_mng.dcs[dcs_idx].batch_sz; uint64_t left; uint32_t iidx = cpool->dcs_mng.dcs[dcs_idx].iidx; uint32_t offset; uint16_t mask; uint16_t sq_idx; uint64_t burst_sz = (uint64_t)(1 << MLX5_ASO_CNT_QUEUE_LOG_DESC) * 4 * sh->cnt_svc->aso_mng.sq_num; uint64_t qburst_sz = burst_sz / sh->cnt_svc->aso_mng.sq_num; uint64_t n; struct mlx5_aso_sq *sq; cnt_num = RTE_MIN(num, cnt_num); left = cnt_num; while (left) { mask = 0; for (sq_idx = 0; sq_idx < sh->cnt_svc->aso_mng.sq_num; sq_idx++) { if (left == 0) { mask |= (1 << sq_idx); continue; } n = RTE_MIN(left, qburst_sz); offset = cnt_num - left; offset += iidx; mlx5_aso_cnt_sq_enqueue_burst(cpool, sh, &sh->cnt_svc->aso_mng.sqs[sq_idx], n, offset, dcs_id); left -= n; } do { for (sq_idx = 0; sq_idx < sh->cnt_svc->aso_mng.sq_num; sq_idx++) { sq = &sh->cnt_svc->aso_mng.sqs[sq_idx]; if (mlx5_aso_cnt_completion_handle(sq)) mask |= (1 << sq_idx); } } while (mask < ((1 << sh->cnt_svc->aso_mng.sq_num) - 1)); } return cnt_num; } /* * Query FW counter via ASO WQE. * * ASO query counter use _sync_ mode, means: * 1. each SQ issue one burst with several WQEs * 2. ask for CQE at last WQE * 3. busy poll CQ of each SQ's * 4. If all SQ's CQE are received then goto step 1, issue next burst * * @param[in] sh * Pointer to shared device. * @param[in] cpool * Pointer to counter pool. * * @return * 0 on success, -1 on failure. */ int mlx5_aso_cnt_query(struct mlx5_dev_ctx_shared *sh, struct mlx5_hws_cnt_pool *cpool) { uint32_t idx; uint32_t num; uint32_t cnt_num = mlx5_hws_cnt_pool_get_size(cpool) - rte_ring_count(cpool->free_list); for (idx = 0; idx < cpool->dcs_mng.batch_total; idx++) { num = RTE_MIN(cnt_num, cpool->dcs_mng.dcs[idx].batch_sz); mlx5_aso_cnt_query_one_dcs(sh, cpool, idx, num); cnt_num -= num; if (cnt_num == 0) break; } return 0; }