net/mana: implement hardware layer operations

The hardware layer of MANA understands the device queue and doorbell
formats. Those functions are implemented for use by packet RX/TX code.

Signed-off-by: Long Li <longli@microsoft.com>
This commit is contained in:
Long Li 2022-10-05 16:22:01 -07:00 committed by Ferruh Yigit
parent 0f5db3c68b
commit 56dd45c0ce
3 changed files with 495 additions and 0 deletions

303
drivers/net/mana/gdma.c Normal file
View File

@ -0,0 +1,303 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright 2022 Microsoft Corporation
*/
#include <ethdev_driver.h>
#include <rte_io.h>
#include "mana.h"
uint8_t *
gdma_get_wqe_pointer(struct mana_gdma_queue *queue)
{
uint32_t offset_in_bytes =
(queue->head * GDMA_WQE_ALIGNMENT_UNIT_SIZE) &
(queue->size - 1);
DRV_LOG(DEBUG, "txq sq_head %u sq_size %u offset_in_bytes %u",
queue->head, queue->size, offset_in_bytes);
if (offset_in_bytes + GDMA_WQE_ALIGNMENT_UNIT_SIZE > queue->size)
DRV_LOG(ERR, "fatal error: offset_in_bytes %u too big",
offset_in_bytes);
return ((uint8_t *)queue->buffer) + offset_in_bytes;
}
static uint32_t
write_dma_client_oob(uint8_t *work_queue_buffer_pointer,
const struct gdma_work_request *work_request,
uint32_t client_oob_size)
{
uint8_t *p = work_queue_buffer_pointer;
struct gdma_wqe_dma_oob *header = (struct gdma_wqe_dma_oob *)p;
memset(header, 0, sizeof(struct gdma_wqe_dma_oob));
header->num_sgl_entries = work_request->num_sgl_elements;
header->inline_client_oob_size_in_dwords =
client_oob_size / sizeof(uint32_t);
header->client_data_unit = work_request->client_data_unit;
DRV_LOG(DEBUG, "queue buf %p sgl %u oob_h %u du %u oob_buf %p oob_b %u",
work_queue_buffer_pointer, header->num_sgl_entries,
header->inline_client_oob_size_in_dwords,
header->client_data_unit, work_request->inline_oob_data,
work_request->inline_oob_size_in_bytes);
p += sizeof(struct gdma_wqe_dma_oob);
if (work_request->inline_oob_data &&
work_request->inline_oob_size_in_bytes > 0) {
memcpy(p, work_request->inline_oob_data,
work_request->inline_oob_size_in_bytes);
if (client_oob_size > work_request->inline_oob_size_in_bytes)
memset(p + work_request->inline_oob_size_in_bytes, 0,
client_oob_size -
work_request->inline_oob_size_in_bytes);
}
return sizeof(struct gdma_wqe_dma_oob) + client_oob_size;
}
static uint32_t
write_scatter_gather_list(uint8_t *work_queue_head_pointer,
uint8_t *work_queue_end_pointer,
uint8_t *work_queue_cur_pointer,
struct gdma_work_request *work_request)
{
struct gdma_sgl_element *sge_list;
struct gdma_sgl_element dummy_sgl[1];
uint8_t *address;
uint32_t size;
uint32_t num_sge;
uint32_t size_to_queue_end;
uint32_t sge_list_size;
DRV_LOG(DEBUG, "work_queue_cur_pointer %p work_request->flags %x",
work_queue_cur_pointer, work_request->flags);
num_sge = work_request->num_sgl_elements;
sge_list = work_request->sgl;
size_to_queue_end = (uint32_t)(work_queue_end_pointer -
work_queue_cur_pointer);
if (num_sge == 0) {
/* Per spec, the case of an empty SGL should be handled as
* follows to avoid corrupted WQE errors:
* Write one dummy SGL entry
* Set the address to 1, leave the rest as 0
*/
dummy_sgl[num_sge].address = 1;
dummy_sgl[num_sge].size = 0;
dummy_sgl[num_sge].memory_key = 0;
num_sge++;
sge_list = dummy_sgl;
}
sge_list_size = 0;
{
address = (uint8_t *)sge_list;
size = sizeof(struct gdma_sgl_element) * num_sge;
if (size_to_queue_end < size) {
memcpy(work_queue_cur_pointer, address,
size_to_queue_end);
work_queue_cur_pointer = work_queue_head_pointer;
address += size_to_queue_end;
size -= size_to_queue_end;
}
memcpy(work_queue_cur_pointer, address, size);
sge_list_size = size;
}
DRV_LOG(DEBUG, "sge %u address 0x%" PRIx64 " size %u key %u list_s %u",
num_sge, sge_list->address, sge_list->size,
sge_list->memory_key, sge_list_size);
return sge_list_size;
}
/*
* Post a work request to queue.
*/
int
gdma_post_work_request(struct mana_gdma_queue *queue,
struct gdma_work_request *work_req,
struct gdma_posted_wqe_info *wqe_info)
{
uint32_t client_oob_size =
work_req->inline_oob_size_in_bytes >
INLINE_OOB_SMALL_SIZE_IN_BYTES ?
INLINE_OOB_LARGE_SIZE_IN_BYTES :
INLINE_OOB_SMALL_SIZE_IN_BYTES;
uint32_t sgl_data_size = sizeof(struct gdma_sgl_element) *
RTE_MAX((uint32_t)1, work_req->num_sgl_elements);
uint32_t wqe_size =
RTE_ALIGN(sizeof(struct gdma_wqe_dma_oob) +
client_oob_size + sgl_data_size,
GDMA_WQE_ALIGNMENT_UNIT_SIZE);
uint8_t *wq_buffer_pointer;
uint32_t queue_free_units = queue->count - (queue->head - queue->tail);
if (wqe_size / GDMA_WQE_ALIGNMENT_UNIT_SIZE > queue_free_units) {
DRV_LOG(DEBUG, "WQE size %u queue count %u head %u tail %u",
wqe_size, queue->count, queue->head, queue->tail);
return -EBUSY;
}
DRV_LOG(DEBUG, "client_oob_size %u sgl_data_size %u wqe_size %u",
client_oob_size, sgl_data_size, wqe_size);
if (wqe_info) {
wqe_info->wqe_index =
((queue->head * GDMA_WQE_ALIGNMENT_UNIT_SIZE) &
(queue->size - 1)) / GDMA_WQE_ALIGNMENT_UNIT_SIZE;
wqe_info->unmasked_queue_offset = queue->head;
wqe_info->wqe_size_in_bu =
wqe_size / GDMA_WQE_ALIGNMENT_UNIT_SIZE;
}
wq_buffer_pointer = gdma_get_wqe_pointer(queue);
wq_buffer_pointer += write_dma_client_oob(wq_buffer_pointer, work_req,
client_oob_size);
if (wq_buffer_pointer >= ((uint8_t *)queue->buffer) + queue->size)
wq_buffer_pointer -= queue->size;
write_scatter_gather_list((uint8_t *)queue->buffer,
(uint8_t *)queue->buffer + queue->size,
wq_buffer_pointer, work_req);
queue->head += wqe_size / GDMA_WQE_ALIGNMENT_UNIT_SIZE;
return 0;
}
union gdma_doorbell_entry {
uint64_t as_uint64;
struct {
uint64_t id : 24;
uint64_t reserved : 8;
uint64_t tail_ptr : 31;
uint64_t arm : 1;
} cq;
struct {
uint64_t id : 24;
uint64_t wqe_cnt : 8;
uint64_t tail_ptr : 32;
} rq;
struct {
uint64_t id : 24;
uint64_t reserved : 8;
uint64_t tail_ptr : 32;
} sq;
struct {
uint64_t id : 16;
uint64_t reserved : 16;
uint64_t tail_ptr : 31;
uint64_t arm : 1;
} eq;
}; /* HW DATA */
enum {
DOORBELL_OFFSET_SQ = 0x0,
DOORBELL_OFFSET_RQ = 0x400,
DOORBELL_OFFSET_CQ = 0x800,
DOORBELL_OFFSET_EQ = 0xFF8,
};
/*
* Write to hardware doorbell to notify new activity.
*/
int
mana_ring_doorbell(void *db_page, enum gdma_queue_types queue_type,
uint32_t queue_id, uint32_t tail)
{
uint8_t *addr = db_page;
union gdma_doorbell_entry e = {};
switch (queue_type) {
case GDMA_QUEUE_SEND:
e.sq.id = queue_id;
e.sq.tail_ptr = tail;
addr += DOORBELL_OFFSET_SQ;
break;
case GDMA_QUEUE_RECEIVE:
e.rq.id = queue_id;
e.rq.tail_ptr = tail;
e.rq.wqe_cnt = 1;
addr += DOORBELL_OFFSET_RQ;
break;
case GDMA_QUEUE_COMPLETION:
e.cq.id = queue_id;
e.cq.tail_ptr = tail;
e.cq.arm = 1;
addr += DOORBELL_OFFSET_CQ;
break;
default:
DRV_LOG(ERR, "Unsupported queue type %d", queue_type);
return -1;
}
/* Ensure all writes are done before ringing doorbell */
rte_wmb();
DRV_LOG(DEBUG, "db_page %p addr %p queue_id %u type %u tail %u",
db_page, addr, queue_id, queue_type, tail);
rte_write64(e.as_uint64, addr);
return 0;
}
/*
* Poll completion queue for completions.
*/
int
gdma_poll_completion_queue(struct mana_gdma_queue *cq, struct gdma_comp *comp)
{
struct gdma_hardware_completion_entry *cqe;
uint32_t head = cq->head % cq->count;
uint32_t new_owner_bits, old_owner_bits;
uint32_t cqe_owner_bits;
struct gdma_hardware_completion_entry *buffer = cq->buffer;
cqe = &buffer[head];
new_owner_bits = (cq->head / cq->count) & COMPLETION_QUEUE_OWNER_MASK;
old_owner_bits = (cq->head / cq->count - 1) &
COMPLETION_QUEUE_OWNER_MASK;
cqe_owner_bits = cqe->owner_bits;
DRV_LOG(DEBUG, "comp cqe bits 0x%x owner bits 0x%x",
cqe_owner_bits, old_owner_bits);
if (cqe_owner_bits == old_owner_bits)
return 0; /* No new entry */
if (cqe_owner_bits != new_owner_bits) {
DRV_LOG(ERR, "CQ overflowed, ID %u cqe 0x%x new 0x%x",
cq->id, cqe_owner_bits, new_owner_bits);
return -1;
}
/* Ensure checking owner bits happens before reading from CQE */
rte_rmb();
comp->work_queue_number = cqe->wq_num;
comp->send_work_queue = cqe->is_sq;
memcpy(comp->completion_data, cqe->dma_client_data, GDMA_COMP_DATA_SIZE);
cq->head++;
DRV_LOG(DEBUG, "comp new 0x%x old 0x%x cqe 0x%x wq %u sq %u head %u",
new_owner_bits, old_owner_bits, cqe_owner_bits,
comp->work_queue_number, comp->send_work_queue, cq->head);
return 1;
}

View File

@ -44,6 +44,177 @@ struct mana_shared_data {
#define MAX_RECEIVE_BUFFERS_PER_QUEUE 256
#define MAX_SEND_BUFFERS_PER_QUEUE 256
#define GDMA_WQE_ALIGNMENT_UNIT_SIZE 32
#define COMP_ENTRY_SIZE 64
#define MAX_TX_WQE_SIZE 512
#define MAX_RX_WQE_SIZE 256
/* Values from the GDMA specification document, WQE format description */
#define INLINE_OOB_SMALL_SIZE_IN_BYTES 8
#define INLINE_OOB_LARGE_SIZE_IN_BYTES 24
#define NOT_USING_CLIENT_DATA_UNIT 0
enum gdma_queue_types {
GDMA_QUEUE_TYPE_INVALID = 0,
GDMA_QUEUE_SEND,
GDMA_QUEUE_RECEIVE,
GDMA_QUEUE_COMPLETION,
GDMA_QUEUE_EVENT,
GDMA_QUEUE_TYPE_MAX = 16,
/*Room for expansion */
/* This enum can be expanded to add more queue types but
* it's expected to be done in a contiguous manner.
* Failing that will result in unexpected behavior.
*/
};
#define WORK_QUEUE_NUMBER_BASE_BITS 10
struct gdma_header {
/* size of the entire gdma structure, including the entire length of
* the struct that is formed by extending other gdma struct. i.e.
* GDMA_BASE_SPEC extends gdma_header, GDMA_EVENT_QUEUE_SPEC extends
* GDMA_BASE_SPEC, StructSize for GDMA_EVENT_QUEUE_SPEC will be size of
* GDMA_EVENT_QUEUE_SPEC which includes size of GDMA_BASE_SPEC and size
* of gdma_header.
* Above example is for illustration purpose and is not in code
*/
size_t struct_size;
};
/* The following macros are from GDMA SPEC 3.6, "Table 2: CQE data structure"
* and "Table 4: Event Queue Entry (EQE) data format"
*/
#define GDMA_COMP_DATA_SIZE 0x3C /* Must be a multiple of 4 */
#define GDMA_COMP_DATA_SIZE_IN_UINT32 (GDMA_COMP_DATA_SIZE / 4)
#define COMPLETION_QUEUE_ENTRY_WORK_QUEUE_INDEX 0
#define COMPLETION_QUEUE_ENTRY_WORK_QUEUE_SIZE 24
#define COMPLETION_QUEUE_ENTRY_SEND_WORK_QUEUE_INDEX 24
#define COMPLETION_QUEUE_ENTRY_SEND_WORK_QUEUE_SIZE 1
#define COMPLETION_QUEUE_ENTRY_OWNER_BITS_INDEX 29
#define COMPLETION_QUEUE_ENTRY_OWNER_BITS_SIZE 3
#define COMPLETION_QUEUE_OWNER_MASK \
((1 << (COMPLETION_QUEUE_ENTRY_OWNER_BITS_SIZE)) - 1)
struct gdma_comp {
struct gdma_header gdma_header;
/* Filled by GDMA core */
uint32_t completion_data[GDMA_COMP_DATA_SIZE_IN_UINT32];
/* Filled by GDMA core */
uint32_t work_queue_number;
/* Filled by GDMA core */
bool send_work_queue;
};
struct gdma_hardware_completion_entry {
char dma_client_data[GDMA_COMP_DATA_SIZE];
union {
uint32_t work_queue_owner_bits;
struct {
uint32_t wq_num : 24;
uint32_t is_sq : 1;
uint32_t reserved : 4;
uint32_t owner_bits : 3;
};
};
}; /* HW DATA */
struct gdma_posted_wqe_info {
struct gdma_header gdma_header;
/* size of the written wqe in basic units (32B), filled by GDMA core.
* Use this value to progress the work queue after the wqe is processed
* by hardware.
*/
uint32_t wqe_size_in_bu;
/* At the time of writing the wqe to the work queue, the offset in the
* work queue buffer where by the wqe will be written. Each unit
* represents 32B of buffer space.
*/
uint32_t wqe_index;
/* Unmasked offset in the queue to which the WQE was written.
* In 32 byte units.
*/
uint32_t unmasked_queue_offset;
};
struct gdma_sgl_element {
uint64_t address;
uint32_t memory_key;
uint32_t size;
};
#define MAX_SGL_ENTRIES_FOR_TRANSMIT 30
struct one_sgl {
struct gdma_sgl_element gdma_sgl[MAX_SGL_ENTRIES_FOR_TRANSMIT];
};
struct gdma_work_request {
struct gdma_header gdma_header;
struct gdma_sgl_element *sgl;
uint32_t num_sgl_elements;
uint32_t inline_oob_size_in_bytes;
void *inline_oob_data;
uint32_t flags; /* From _gdma_work_request_FLAGS */
uint32_t client_data_unit; /* For LSO, this is the MTU of the data */
};
enum mana_cqe_type {
CQE_INVALID = 0,
};
struct mana_cqe_header {
uint32_t cqe_type : 6;
uint32_t client_type : 2;
uint32_t vendor_err : 24;
}; /* HW DATA */
/* NDIS HASH Types */
#define NDIS_HASH_IPV4 RTE_BIT32(0)
#define NDIS_HASH_TCP_IPV4 RTE_BIT32(1)
#define NDIS_HASH_UDP_IPV4 RTE_BIT32(2)
#define NDIS_HASH_IPV6 RTE_BIT32(3)
#define NDIS_HASH_TCP_IPV6 RTE_BIT32(4)
#define NDIS_HASH_UDP_IPV6 RTE_BIT32(5)
#define NDIS_HASH_IPV6_EX RTE_BIT32(6)
#define NDIS_HASH_TCP_IPV6_EX RTE_BIT32(7)
#define NDIS_HASH_UDP_IPV6_EX RTE_BIT32(8)
#define MANA_HASH_L3 (NDIS_HASH_IPV4 | NDIS_HASH_IPV6 | NDIS_HASH_IPV6_EX)
#define MANA_HASH_L4 \
(NDIS_HASH_TCP_IPV4 | NDIS_HASH_UDP_IPV4 | NDIS_HASH_TCP_IPV6 | \
NDIS_HASH_UDP_IPV6 | NDIS_HASH_TCP_IPV6_EX | NDIS_HASH_UDP_IPV6_EX)
struct gdma_wqe_dma_oob {
uint32_t reserved:24;
uint32_t last_v_bytes:8;
union {
uint32_t flags;
struct {
uint32_t num_sgl_entries:8;
uint32_t inline_client_oob_size_in_dwords:3;
uint32_t client_oob_in_sgl:1;
uint32_t consume_credit:1;
uint32_t fence:1;
uint32_t reserved1:2;
uint32_t client_data_unit:14;
uint32_t check_sn:1;
uint32_t sgl_direct:1;
};
};
};
struct mana_mr_cache {
uint32_t lkey;
uintptr_t addr;
@ -103,6 +274,15 @@ struct mana_rxq_desc {
uint32_t wqe_size_in_bu;
};
struct mana_gdma_queue {
void *buffer;
uint32_t count; /* in entries */
uint32_t size; /* in bytes */
uint32_t id;
uint32_t head;
uint32_t tail;
};
#define MANA_MR_BTREE_PER_QUEUE_N 64
struct mana_txq {
@ -152,12 +332,23 @@ extern int mana_logtype_init;
#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>")
int mana_ring_doorbell(void *db_page, enum gdma_queue_types queue_type,
uint32_t queue_id, uint32_t tail);
int gdma_post_work_request(struct mana_gdma_queue *queue,
struct gdma_work_request *work_req,
struct gdma_posted_wqe_info *wqe_info);
uint8_t *gdma_get_wqe_pointer(struct mana_gdma_queue *queue);
uint16_t mana_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
uint16_t pkts_n);
uint16_t mana_tx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
uint16_t pkts_n);
int gdma_poll_completion_queue(struct mana_gdma_queue *cq,
struct gdma_comp *comp);
struct mana_mr_cache *mana_find_pmd_mr(struct mana_mr_btree *local_tree,
struct mana_priv *priv,
struct rte_mbuf *mbuf);

View File

@ -10,6 +10,7 @@ endif
deps += ['pci', 'bus_pci', 'net', 'eal', 'kvargs']
sources += files(
'gdma.c',
'mana.c',
'mp.c',
'mr.c',