net/gve/base: introduce Google Virtual Ethernet base

The following base code is based on Google Virtual Ethernet (gve)
driver v1.3.0 under MIT license.
- gve_adminq.c
- gve_adminq.h
- gve_desc.h
- gve_desc_dqo.h
- gve_register.h
- gve.h

The original code is in:
https://github.com/GoogleCloudPlatform/compute-virtual-ethernet-linux/\
tree/v1.3.0/google/gve

Note that these code are not Intel files and they come from the kernel
community. The base code there has the statement of
SPDX-License-Identifier: (GPL-2.0 OR MIT). Here we just follow the
required MIT license as an exception to DPDK.

Signed-off-by: Xiaoyun Li <xiaoyun.li@intel.com>
Signed-off-by: Haiyue Wang <haiyue.wang@intel.com>
Signed-off-by: Junfeng Guo <junfeng.guo@intel.com>
This commit is contained in:
Junfeng Guo 2022-10-25 17:07:22 +08:00 committed by Ferruh Yigit
parent 9082336048
commit f86e5ed867
6 changed files with 1771 additions and 0 deletions

View File

@ -0,0 +1,56 @@
/* SPDX-License-Identifier: MIT
* Google Virtual Ethernet (gve) driver
* Copyright (C) 2015-2022 Google, Inc.
*/
#ifndef _GVE_H_
#define _GVE_H_
#include "gve_desc.h"
#define GVE_VERSION "1.3.0"
#define GVE_VERSION_PREFIX "GVE-"
#ifndef GOOGLE_VENDOR_ID
#define GOOGLE_VENDOR_ID 0x1ae0
#endif
#define GVE_DEV_ID 0x0042
#define GVE_REG_BAR 0
#define GVE_DB_BAR 2
/* 1 for management, 1 for rx, 1 for tx */
#define GVE_MIN_MSIX 3
/* PTYPEs are always 10 bits. */
#define GVE_NUM_PTYPES 1024
struct gve_irq_db {
rte_be32_t id;
} ____cacheline_aligned;
struct gve_ptype {
uint8_t l3_type; /* `gve_l3_type` in gve_adminq.h */
uint8_t l4_type; /* `gve_l4_type` in gve_adminq.h */
};
struct gve_ptype_lut {
struct gve_ptype ptypes[GVE_NUM_PTYPES];
};
enum gve_queue_format {
GVE_QUEUE_FORMAT_UNSPECIFIED = 0x0, /* default unspecified */
GVE_GQI_RDA_FORMAT = 0x1, /* GQI Raw Addressing */
GVE_GQI_QPL_FORMAT = 0x2, /* GQI Queue Page List */
GVE_DQO_RDA_FORMAT = 0x3, /* DQO Raw Addressing */
};
enum gve_state_flags_bit {
GVE_PRIV_FLAGS_ADMIN_QUEUE_OK = 1,
GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK = 2,
GVE_PRIV_FLAGS_DEVICE_RINGS_OK = 3,
GVE_PRIV_FLAGS_NAPI_ENABLED = 4,
};
#endif /* _GVE_H_ */

View File

@ -0,0 +1,920 @@
/* SPDX-License-Identifier: MIT
* Google Virtual Ethernet (gve) driver
* Copyright (C) 2015-2022 Google, Inc.
*/
#include "gve_adminq.h"
#include "gve_register.h"
#define GVE_MAX_ADMINQ_RELEASE_CHECK 500
#define GVE_ADMINQ_SLEEP_LEN 20
#define GVE_MAX_ADMINQ_EVENT_COUNTER_CHECK 100
#define GVE_DEVICE_OPTION_ERROR_FMT "%s option error:\n Expected: length=%d, feature_mask=%x.\n Actual: length=%d, feature_mask=%x."
#define GVE_DEVICE_OPTION_TOO_BIG_FMT "Length of %s option larger than expected. Possible older version of guest driver."
static
struct gve_device_option *gve_get_next_option(struct gve_device_descriptor *descriptor,
struct gve_device_option *option)
{
uintptr_t option_end, descriptor_end;
option_end = (uintptr_t)option + sizeof(*option) + be16_to_cpu(option->option_length);
descriptor_end = (uintptr_t)descriptor + be16_to_cpu(descriptor->total_length);
return option_end > descriptor_end ? NULL : (struct gve_device_option *)option_end;
}
static
void gve_parse_device_option(struct gve_priv *priv,
struct gve_device_option *option,
struct gve_device_option_gqi_rda **dev_op_gqi_rda,
struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
struct gve_device_option_dqo_rda **dev_op_dqo_rda,
struct gve_device_option_jumbo_frames **dev_op_jumbo_frames)
{
u32 req_feat_mask = be32_to_cpu(option->required_features_mask);
u16 option_length = be16_to_cpu(option->option_length);
u16 option_id = be16_to_cpu(option->option_id);
/* If the length or feature mask doesn't match, continue without
* enabling the feature.
*/
switch (option_id) {
case GVE_DEV_OPT_ID_GQI_RAW_ADDRESSING:
if (option_length != GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING ||
req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING) {
PMD_DRV_LOG(WARNING, GVE_DEVICE_OPTION_ERROR_FMT,
"Raw Addressing",
GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING,
GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING,
option_length, req_feat_mask);
break;
}
PMD_DRV_LOG(INFO, "Gqi raw addressing device option enabled.");
priv->queue_format = GVE_GQI_RDA_FORMAT;
break;
case GVE_DEV_OPT_ID_GQI_RDA:
if (option_length < sizeof(**dev_op_gqi_rda) ||
req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA) {
PMD_DRV_LOG(WARNING, GVE_DEVICE_OPTION_ERROR_FMT,
"GQI RDA", (int)sizeof(**dev_op_gqi_rda),
GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA,
option_length, req_feat_mask);
break;
}
if (option_length > sizeof(**dev_op_gqi_rda)) {
PMD_DRV_LOG(WARNING,
GVE_DEVICE_OPTION_TOO_BIG_FMT, "GQI RDA");
}
*dev_op_gqi_rda = RTE_PTR_ADD(option, sizeof(*option));
break;
case GVE_DEV_OPT_ID_GQI_QPL:
if (option_length < sizeof(**dev_op_gqi_qpl) ||
req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL) {
PMD_DRV_LOG(WARNING, GVE_DEVICE_OPTION_ERROR_FMT,
"GQI QPL", (int)sizeof(**dev_op_gqi_qpl),
GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL,
option_length, req_feat_mask);
break;
}
if (option_length > sizeof(**dev_op_gqi_qpl)) {
PMD_DRV_LOG(WARNING,
GVE_DEVICE_OPTION_TOO_BIG_FMT, "GQI QPL");
}
*dev_op_gqi_qpl = RTE_PTR_ADD(option, sizeof(*option));
break;
case GVE_DEV_OPT_ID_DQO_RDA:
if (option_length < sizeof(**dev_op_dqo_rda) ||
req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA) {
PMD_DRV_LOG(WARNING, GVE_DEVICE_OPTION_ERROR_FMT,
"DQO RDA", (int)sizeof(**dev_op_dqo_rda),
GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA,
option_length, req_feat_mask);
break;
}
if (option_length > sizeof(**dev_op_dqo_rda)) {
PMD_DRV_LOG(WARNING,
GVE_DEVICE_OPTION_TOO_BIG_FMT, "DQO RDA");
}
*dev_op_dqo_rda = RTE_PTR_ADD(option, sizeof(*option));
break;
case GVE_DEV_OPT_ID_JUMBO_FRAMES:
if (option_length < sizeof(**dev_op_jumbo_frames) ||
req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES) {
PMD_DRV_LOG(WARNING, GVE_DEVICE_OPTION_ERROR_FMT,
"Jumbo Frames",
(int)sizeof(**dev_op_jumbo_frames),
GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES,
option_length, req_feat_mask);
break;
}
if (option_length > sizeof(**dev_op_jumbo_frames)) {
PMD_DRV_LOG(WARNING,
GVE_DEVICE_OPTION_TOO_BIG_FMT,
"Jumbo Frames");
}
*dev_op_jumbo_frames = RTE_PTR_ADD(option, sizeof(*option));
break;
default:
/* If we don't recognize the option just continue
* without doing anything.
*/
PMD_DRV_LOG(DEBUG, "Unrecognized device option 0x%hx not enabled.",
option_id);
}
}
/* Process all device options for a given describe device call. */
static int
gve_process_device_options(struct gve_priv *priv,
struct gve_device_descriptor *descriptor,
struct gve_device_option_gqi_rda **dev_op_gqi_rda,
struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
struct gve_device_option_dqo_rda **dev_op_dqo_rda,
struct gve_device_option_jumbo_frames **dev_op_jumbo_frames)
{
const int num_options = be16_to_cpu(descriptor->num_device_options);
struct gve_device_option *dev_opt;
int i;
/* The options struct directly follows the device descriptor. */
dev_opt = RTE_PTR_ADD(descriptor, sizeof(*descriptor));
for (i = 0; i < num_options; i++) {
struct gve_device_option *next_opt;
next_opt = gve_get_next_option(descriptor, dev_opt);
if (!next_opt) {
PMD_DRV_LOG(ERR,
"options exceed device_descriptor's total length.");
return -EINVAL;
}
gve_parse_device_option(priv, dev_opt,
dev_op_gqi_rda, dev_op_gqi_qpl,
dev_op_dqo_rda, dev_op_jumbo_frames);
dev_opt = next_opt;
}
return 0;
}
int gve_adminq_alloc(struct gve_priv *priv)
{
priv->adminq = gve_alloc_dma_mem(&priv->adminq_dma_mem, PAGE_SIZE);
if (unlikely(!priv->adminq))
return -ENOMEM;
priv->adminq_mask = (PAGE_SIZE / sizeof(union gve_adminq_command)) - 1;
priv->adminq_prod_cnt = 0;
priv->adminq_cmd_fail = 0;
priv->adminq_timeouts = 0;
priv->adminq_describe_device_cnt = 0;
priv->adminq_cfg_device_resources_cnt = 0;
priv->adminq_register_page_list_cnt = 0;
priv->adminq_unregister_page_list_cnt = 0;
priv->adminq_create_tx_queue_cnt = 0;
priv->adminq_create_rx_queue_cnt = 0;
priv->adminq_destroy_tx_queue_cnt = 0;
priv->adminq_destroy_rx_queue_cnt = 0;
priv->adminq_dcfg_device_resources_cnt = 0;
priv->adminq_set_driver_parameter_cnt = 0;
priv->adminq_report_stats_cnt = 0;
priv->adminq_report_link_speed_cnt = 0;
priv->adminq_get_ptype_map_cnt = 0;
/* Setup Admin queue with the device */
iowrite32be(priv->adminq_dma_mem.pa / PAGE_SIZE,
&priv->reg_bar0->adminq_pfn);
gve_set_admin_queue_ok(priv);
return 0;
}
void gve_adminq_release(struct gve_priv *priv)
{
int i = 0;
/* Tell the device the adminq is leaving */
iowrite32be(0x0, &priv->reg_bar0->adminq_pfn);
while (ioread32be(&priv->reg_bar0->adminq_pfn)) {
/* If this is reached the device is unrecoverable and still
* holding memory. Continue looping to avoid memory corruption,
* but WARN so it is visible what is going on.
*/
if (i == GVE_MAX_ADMINQ_RELEASE_CHECK)
PMD_DRV_LOG(WARNING, "Unrecoverable platform error!");
i++;
msleep(GVE_ADMINQ_SLEEP_LEN);
}
gve_clear_device_rings_ok(priv);
gve_clear_device_resources_ok(priv);
gve_clear_admin_queue_ok(priv);
}
void gve_adminq_free(struct gve_priv *priv)
{
if (!gve_get_admin_queue_ok(priv))
return;
gve_adminq_release(priv);
gve_free_dma_mem(&priv->adminq_dma_mem);
gve_clear_admin_queue_ok(priv);
}
static void gve_adminq_kick_cmd(struct gve_priv *priv, u32 prod_cnt)
{
iowrite32be(prod_cnt, &priv->reg_bar0->adminq_doorbell);
}
static bool gve_adminq_wait_for_cmd(struct gve_priv *priv, u32 prod_cnt)
{
int i;
for (i = 0; i < GVE_MAX_ADMINQ_EVENT_COUNTER_CHECK; i++) {
if (ioread32be(&priv->reg_bar0->adminq_event_counter)
== prod_cnt)
return true;
msleep(GVE_ADMINQ_SLEEP_LEN);
}
return false;
}
static int gve_adminq_parse_err(struct gve_priv *priv, u32 status)
{
if (status != GVE_ADMINQ_COMMAND_PASSED &&
status != GVE_ADMINQ_COMMAND_UNSET) {
PMD_DRV_LOG(ERR, "AQ command failed with status %d", status);
priv->adminq_cmd_fail++;
}
switch (status) {
case GVE_ADMINQ_COMMAND_PASSED:
return 0;
case GVE_ADMINQ_COMMAND_UNSET:
PMD_DRV_LOG(ERR, "parse_aq_err: err and status both unset, this should not be possible.");
return -EINVAL;
case GVE_ADMINQ_COMMAND_ERROR_ABORTED:
case GVE_ADMINQ_COMMAND_ERROR_CANCELLED:
case GVE_ADMINQ_COMMAND_ERROR_DATALOSS:
case GVE_ADMINQ_COMMAND_ERROR_FAILED_PRECONDITION:
case GVE_ADMINQ_COMMAND_ERROR_UNAVAILABLE:
return -EAGAIN;
case GVE_ADMINQ_COMMAND_ERROR_ALREADY_EXISTS:
case GVE_ADMINQ_COMMAND_ERROR_INTERNAL_ERROR:
case GVE_ADMINQ_COMMAND_ERROR_INVALID_ARGUMENT:
case GVE_ADMINQ_COMMAND_ERROR_NOT_FOUND:
case GVE_ADMINQ_COMMAND_ERROR_OUT_OF_RANGE:
case GVE_ADMINQ_COMMAND_ERROR_UNKNOWN_ERROR:
return -EINVAL;
case GVE_ADMINQ_COMMAND_ERROR_DEADLINE_EXCEEDED:
return -ETIMEDOUT;
case GVE_ADMINQ_COMMAND_ERROR_PERMISSION_DENIED:
case GVE_ADMINQ_COMMAND_ERROR_UNAUTHENTICATED:
return -EACCES;
case GVE_ADMINQ_COMMAND_ERROR_RESOURCE_EXHAUSTED:
return -ENOMEM;
case GVE_ADMINQ_COMMAND_ERROR_UNIMPLEMENTED:
return -ENOTSUP;
default:
PMD_DRV_LOG(ERR, "parse_aq_err: unknown status code %d",
status);
return -EINVAL;
}
}
/* Flushes all AQ commands currently queued and waits for them to complete.
* If there are failures, it will return the first error.
*/
static int gve_adminq_kick_and_wait(struct gve_priv *priv)
{
u32 tail, head;
u32 i;
tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
head = priv->adminq_prod_cnt;
gve_adminq_kick_cmd(priv, head);
if (!gve_adminq_wait_for_cmd(priv, head)) {
PMD_DRV_LOG(ERR, "AQ commands timed out, need to reset AQ");
priv->adminq_timeouts++;
return -ENOTRECOVERABLE;
}
for (i = tail; i < head; i++) {
union gve_adminq_command *cmd;
u32 status, err;
cmd = &priv->adminq[i & priv->adminq_mask];
status = be32_to_cpu(READ_ONCE32(cmd->status));
err = gve_adminq_parse_err(priv, status);
if (err)
/* Return the first error if we failed. */
return err;
}
return 0;
}
/* This function is not threadsafe - the caller is responsible for any
* necessary locks.
*/
static int gve_adminq_issue_cmd(struct gve_priv *priv,
union gve_adminq_command *cmd_orig)
{
union gve_adminq_command *cmd;
u32 opcode;
u32 tail;
tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
/* Check if next command will overflow the buffer. */
if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) ==
(tail & priv->adminq_mask)) {
int err;
/* Flush existing commands to make room. */
err = gve_adminq_kick_and_wait(priv);
if (err)
return err;
/* Retry. */
tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) ==
(tail & priv->adminq_mask)) {
/* This should never happen. We just flushed the
* command queue so there should be enough space.
*/
return -ENOMEM;
}
}
cmd = &priv->adminq[priv->adminq_prod_cnt & priv->adminq_mask];
priv->adminq_prod_cnt++;
memcpy(cmd, cmd_orig, sizeof(*cmd_orig));
opcode = be32_to_cpu(READ_ONCE32(cmd->opcode));
switch (opcode) {
case GVE_ADMINQ_DESCRIBE_DEVICE:
priv->adminq_describe_device_cnt++;
break;
case GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES:
priv->adminq_cfg_device_resources_cnt++;
break;
case GVE_ADMINQ_REGISTER_PAGE_LIST:
priv->adminq_register_page_list_cnt++;
break;
case GVE_ADMINQ_UNREGISTER_PAGE_LIST:
priv->adminq_unregister_page_list_cnt++;
break;
case GVE_ADMINQ_CREATE_TX_QUEUE:
priv->adminq_create_tx_queue_cnt++;
break;
case GVE_ADMINQ_CREATE_RX_QUEUE:
priv->adminq_create_rx_queue_cnt++;
break;
case GVE_ADMINQ_DESTROY_TX_QUEUE:
priv->adminq_destroy_tx_queue_cnt++;
break;
case GVE_ADMINQ_DESTROY_RX_QUEUE:
priv->adminq_destroy_rx_queue_cnt++;
break;
case GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES:
priv->adminq_dcfg_device_resources_cnt++;
break;
case GVE_ADMINQ_SET_DRIVER_PARAMETER:
priv->adminq_set_driver_parameter_cnt++;
break;
case GVE_ADMINQ_REPORT_STATS:
priv->adminq_report_stats_cnt++;
break;
case GVE_ADMINQ_REPORT_LINK_SPEED:
priv->adminq_report_link_speed_cnt++;
break;
case GVE_ADMINQ_GET_PTYPE_MAP:
priv->adminq_get_ptype_map_cnt++;
break;
default:
PMD_DRV_LOG(ERR, "unknown AQ command opcode %d", opcode);
}
return 0;
}
/* This function is not threadsafe - the caller is responsible for any
* necessary locks.
* The caller is also responsible for making sure there are no commands
* waiting to be executed.
*/
static int gve_adminq_execute_cmd(struct gve_priv *priv,
union gve_adminq_command *cmd_orig)
{
u32 tail, head;
int err;
tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
head = priv->adminq_prod_cnt;
if (tail != head)
/* This is not a valid path */
return -EINVAL;
err = gve_adminq_issue_cmd(priv, cmd_orig);
if (err)
return err;
return gve_adminq_kick_and_wait(priv);
}
/* The device specifies that the management vector can either be the first irq
* or the last irq. ntfy_blk_msix_base_idx indicates the first irq assigned to
* the ntfy blks. It if is 0 then the management vector is last, if it is 1 then
* the management vector is first.
*
* gve arranges the msix vectors so that the management vector is last.
*/
#define GVE_NTFY_BLK_BASE_MSIX_IDX 0
int gve_adminq_configure_device_resources(struct gve_priv *priv,
dma_addr_t counter_array_bus_addr,
u32 num_counters,
dma_addr_t db_array_bus_addr,
u32 num_ntfy_blks)
{
union gve_adminq_command cmd;
memset(&cmd, 0, sizeof(cmd));
cmd.opcode = cpu_to_be32(GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES);
cmd.configure_device_resources =
(struct gve_adminq_configure_device_resources) {
.counter_array = cpu_to_be64(counter_array_bus_addr),
.num_counters = cpu_to_be32(num_counters),
.irq_db_addr = cpu_to_be64(db_array_bus_addr),
.num_irq_dbs = cpu_to_be32(num_ntfy_blks),
.irq_db_stride = cpu_to_be32(sizeof(*priv->irq_dbs)),
.ntfy_blk_msix_base_idx =
cpu_to_be32(GVE_NTFY_BLK_BASE_MSIX_IDX),
.queue_format = priv->queue_format,
};
return gve_adminq_execute_cmd(priv, &cmd);
}
int gve_adminq_deconfigure_device_resources(struct gve_priv *priv)
{
union gve_adminq_command cmd;
memset(&cmd, 0, sizeof(cmd));
cmd.opcode = cpu_to_be32(GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES);
return gve_adminq_execute_cmd(priv, &cmd);
}
static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
{
struct gve_tx_queue *txq = priv->txqs[queue_index];
union gve_adminq_command cmd;
memset(&cmd, 0, sizeof(cmd));
cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_TX_QUEUE);
cmd.create_tx_queue = (struct gve_adminq_create_tx_queue) {
.queue_id = cpu_to_be32(queue_index),
.queue_resources_addr =
cpu_to_be64(txq->qres_mz->iova),
.tx_ring_addr = cpu_to_be64(txq->tx_ring_phys_addr),
.ntfy_id = cpu_to_be32(txq->ntfy_id),
};
if (gve_is_gqi(priv)) {
u32 qpl_id = priv->queue_format == GVE_GQI_RDA_FORMAT ?
GVE_RAW_ADDRESSING_QPL_ID : txq->qpl->id;
cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
} else {
cmd.create_tx_queue.tx_ring_size =
cpu_to_be16(txq->nb_tx_desc);
cmd.create_tx_queue.tx_comp_ring_addr =
cpu_to_be64(txq->complq->tx_ring_phys_addr);
cmd.create_tx_queue.tx_comp_ring_size =
cpu_to_be16(priv->tx_compq_size);
}
return gve_adminq_issue_cmd(priv, &cmd);
}
int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 num_queues)
{
int err;
u32 i;
for (i = 0; i < num_queues; i++) {
err = gve_adminq_create_tx_queue(priv, i);
if (err)
return err;
}
return gve_adminq_kick_and_wait(priv);
}
static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
{
struct gve_rx_queue *rxq = priv->rxqs[queue_index];
union gve_adminq_command cmd;
memset(&cmd, 0, sizeof(cmd));
cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE);
cmd.create_rx_queue = (struct gve_adminq_create_rx_queue) {
.queue_id = cpu_to_be32(queue_index),
.ntfy_id = cpu_to_be32(rxq->ntfy_id),
.queue_resources_addr = cpu_to_be64(rxq->qres_mz->iova),
};
if (gve_is_gqi(priv)) {
u32 qpl_id = priv->queue_format == GVE_GQI_RDA_FORMAT ?
GVE_RAW_ADDRESSING_QPL_ID : rxq->qpl->id;
cmd.create_rx_queue.rx_desc_ring_addr =
cpu_to_be64(rxq->mz->iova),
cmd.create_rx_queue.rx_data_ring_addr =
cpu_to_be64(rxq->data_mz->iova),
cmd.create_rx_queue.index = cpu_to_be32(queue_index);
cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
cmd.create_rx_queue.packet_buffer_size = cpu_to_be16(rxq->rx_buf_len);
} else {
cmd.create_rx_queue.rx_ring_size =
cpu_to_be16(priv->rx_desc_cnt);
cmd.create_rx_queue.rx_desc_ring_addr =
cpu_to_be64(rxq->rx_ring_phys_addr);
cmd.create_rx_queue.rx_data_ring_addr =
cpu_to_be64(rxq->bufq->rx_ring_phys_addr);
cmd.create_rx_queue.packet_buffer_size =
cpu_to_be16(rxq->rx_buf_len);
cmd.create_rx_queue.rx_buff_ring_size =
cpu_to_be16(priv->rx_bufq_size);
cmd.create_rx_queue.enable_rsc = !!(priv->enable_rsc);
}
return gve_adminq_issue_cmd(priv, &cmd);
}
int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues)
{
int err;
u32 i;
for (i = 0; i < num_queues; i++) {
err = gve_adminq_create_rx_queue(priv, i);
if (err)
return err;
}
return gve_adminq_kick_and_wait(priv);
}
static int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_index)
{
union gve_adminq_command cmd;
int err;
memset(&cmd, 0, sizeof(cmd));
cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_TX_QUEUE);
cmd.destroy_tx_queue = (struct gve_adminq_destroy_tx_queue) {
.queue_id = cpu_to_be32(queue_index),
};
err = gve_adminq_issue_cmd(priv, &cmd);
if (err)
return err;
return 0;
}
int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 num_queues)
{
int err;
u32 i;
for (i = 0; i < num_queues; i++) {
err = gve_adminq_destroy_tx_queue(priv, i);
if (err)
return err;
}
return gve_adminq_kick_and_wait(priv);
}
static int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_index)
{
union gve_adminq_command cmd;
int err;
memset(&cmd, 0, sizeof(cmd));
cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE);
cmd.destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) {
.queue_id = cpu_to_be32(queue_index),
};
err = gve_adminq_issue_cmd(priv, &cmd);
if (err)
return err;
return 0;
}
int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues)
{
int err;
u32 i;
for (i = 0; i < num_queues; i++) {
err = gve_adminq_destroy_rx_queue(priv, i);
if (err)
return err;
}
return gve_adminq_kick_and_wait(priv);
}
static int gve_set_desc_cnt(struct gve_priv *priv,
struct gve_device_descriptor *descriptor)
{
priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
if (priv->tx_desc_cnt * sizeof(priv->txqs[0]->tx_desc_ring[0])
< PAGE_SIZE) {
PMD_DRV_LOG(ERR, "Tx desc count %d too low", priv->tx_desc_cnt);
return -EINVAL;
}
priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
if (priv->rx_desc_cnt * sizeof(priv->rxqs[0]->rx_desc_ring[0])
< PAGE_SIZE) {
PMD_DRV_LOG(ERR, "Rx desc count %d too low", priv->rx_desc_cnt);
return -EINVAL;
}
return 0;
}
static int
gve_set_desc_cnt_dqo(struct gve_priv *priv,
const struct gve_device_descriptor *descriptor,
const struct gve_device_option_dqo_rda *dev_op_dqo_rda)
{
priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
priv->tx_compq_size = be16_to_cpu(dev_op_dqo_rda->tx_comp_ring_entries);
priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
priv->rx_bufq_size = be16_to_cpu(dev_op_dqo_rda->rx_buff_ring_entries);
return 0;
}
static void gve_enable_supported_features(struct gve_priv *priv,
u32 supported_features_mask,
const struct gve_device_option_jumbo_frames
*dev_op_jumbo_frames)
{
/* Before control reaches this point, the page-size-capped max MTU from
* the gve_device_descriptor field has already been stored in
* priv->dev->max_mtu. We overwrite it with the true max MTU below.
*/
if (dev_op_jumbo_frames &&
(supported_features_mask & GVE_SUP_JUMBO_FRAMES_MASK)) {
PMD_DRV_LOG(INFO, "JUMBO FRAMES device option enabled.");
priv->max_mtu = be16_to_cpu(dev_op_jumbo_frames->max_mtu);
}
}
int gve_adminq_describe_device(struct gve_priv *priv)
{
struct gve_device_option_jumbo_frames *dev_op_jumbo_frames = NULL;
struct gve_device_option_gqi_rda *dev_op_gqi_rda = NULL;
struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL;
struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL;
struct gve_device_descriptor *descriptor;
struct gve_dma_mem descriptor_dma_mem;
u32 supported_features_mask = 0;
union gve_adminq_command cmd;
int err = 0;
u16 mtu;
memset(&cmd, 0, sizeof(cmd));
descriptor = gve_alloc_dma_mem(&descriptor_dma_mem, PAGE_SIZE);
if (!descriptor)
return -ENOMEM;
cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESCRIBE_DEVICE);
cmd.describe_device.device_descriptor_addr =
cpu_to_be64(descriptor_dma_mem.pa);
cmd.describe_device.device_descriptor_version =
cpu_to_be32(GVE_ADMINQ_DEVICE_DESCRIPTOR_VERSION);
cmd.describe_device.available_length = cpu_to_be32(PAGE_SIZE);
err = gve_adminq_execute_cmd(priv, &cmd);
if (err)
goto free_device_descriptor;
err = gve_process_device_options(priv, descriptor, &dev_op_gqi_rda,
&dev_op_gqi_qpl, &dev_op_dqo_rda,
&dev_op_jumbo_frames);
if (err)
goto free_device_descriptor;
/* If the GQI_RAW_ADDRESSING option is not enabled and the queue format
* is not set to GqiRda, choose the queue format in a priority order:
* DqoRda, GqiRda, GqiQpl. Use GqiQpl as default.
*/
if (dev_op_dqo_rda) {
priv->queue_format = GVE_DQO_RDA_FORMAT;
PMD_DRV_LOG(INFO, "Driver is running with DQO RDA queue format.");
supported_features_mask =
be32_to_cpu(dev_op_dqo_rda->supported_features_mask);
} else if (dev_op_gqi_rda) {
priv->queue_format = GVE_GQI_RDA_FORMAT;
PMD_DRV_LOG(INFO, "Driver is running with GQI RDA queue format.");
supported_features_mask =
be32_to_cpu(dev_op_gqi_rda->supported_features_mask);
} else if (priv->queue_format == GVE_GQI_RDA_FORMAT) {
PMD_DRV_LOG(INFO, "Driver is running with GQI RDA queue format.");
} else {
priv->queue_format = GVE_GQI_QPL_FORMAT;
if (dev_op_gqi_qpl)
supported_features_mask =
be32_to_cpu(dev_op_gqi_qpl->supported_features_mask);
PMD_DRV_LOG(INFO, "Driver is running with GQI QPL queue format.");
}
if (gve_is_gqi(priv)) {
err = gve_set_desc_cnt(priv, descriptor);
} else {
/* DQO supports LRO. */
err = gve_set_desc_cnt_dqo(priv, descriptor, dev_op_dqo_rda);
}
if (err)
goto free_device_descriptor;
priv->max_registered_pages =
be64_to_cpu(descriptor->max_registered_pages);
mtu = be16_to_cpu(descriptor->mtu);
if (mtu < ETH_MIN_MTU) {
PMD_DRV_LOG(ERR, "MTU %d below minimum MTU", mtu);
err = -EINVAL;
goto free_device_descriptor;
}
priv->max_mtu = mtu;
priv->num_event_counters = be16_to_cpu(descriptor->counters);
rte_memcpy(priv->dev_addr.addr_bytes, descriptor->mac, ETH_ALEN);
PMD_DRV_LOG(INFO, "MAC addr: " RTE_ETHER_ADDR_PRT_FMT,
RTE_ETHER_ADDR_BYTES(&priv->dev_addr));
priv->tx_pages_per_qpl = be16_to_cpu(descriptor->tx_pages_per_qpl);
priv->rx_data_slot_cnt = be16_to_cpu(descriptor->rx_pages_per_qpl);
if (gve_is_gqi(priv) && priv->rx_data_slot_cnt < priv->rx_desc_cnt) {
PMD_DRV_LOG(ERR,
"rx_data_slot_cnt cannot be smaller than rx_desc_cnt, setting rx_desc_cnt down to %d",
priv->rx_data_slot_cnt);
priv->rx_desc_cnt = priv->rx_data_slot_cnt;
}
priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues);
gve_enable_supported_features(priv, supported_features_mask,
dev_op_jumbo_frames);
free_device_descriptor:
gve_free_dma_mem(&descriptor_dma_mem);
return err;
}
int gve_adminq_register_page_list(struct gve_priv *priv,
struct gve_queue_page_list *qpl)
{
struct gve_dma_mem page_list_dma_mem;
u32 num_entries = qpl->num_entries;
u32 size = num_entries * sizeof(qpl->page_buses[0]);
union gve_adminq_command cmd;
__be64 *page_list;
int err;
u32 i;
memset(&cmd, 0, sizeof(cmd));
page_list = gve_alloc_dma_mem(&page_list_dma_mem, size);
if (!page_list)
return -ENOMEM;
for (i = 0; i < num_entries; i++)
page_list[i] = cpu_to_be64(qpl->page_buses[i]);
cmd.opcode = cpu_to_be32(GVE_ADMINQ_REGISTER_PAGE_LIST);
cmd.reg_page_list = (struct gve_adminq_register_page_list) {
.page_list_id = cpu_to_be32(qpl->id),
.num_pages = cpu_to_be32(num_entries),
.page_address_list_addr = cpu_to_be64(page_list_dma_mem.pa),
};
err = gve_adminq_execute_cmd(priv, &cmd);
gve_free_dma_mem(&page_list_dma_mem);
return err;
}
int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id)
{
union gve_adminq_command cmd;
memset(&cmd, 0, sizeof(cmd));
cmd.opcode = cpu_to_be32(GVE_ADMINQ_UNREGISTER_PAGE_LIST);
cmd.unreg_page_list = (struct gve_adminq_unregister_page_list) {
.page_list_id = cpu_to_be32(page_list_id),
};
return gve_adminq_execute_cmd(priv, &cmd);
}
int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu)
{
union gve_adminq_command cmd;
memset(&cmd, 0, sizeof(cmd));
cmd.opcode = cpu_to_be32(GVE_ADMINQ_SET_DRIVER_PARAMETER);
cmd.set_driver_param = (struct gve_adminq_set_driver_parameter) {
.parameter_type = cpu_to_be32(GVE_SET_PARAM_MTU),
.parameter_value = cpu_to_be64(mtu),
};
return gve_adminq_execute_cmd(priv, &cmd);
}
int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len,
dma_addr_t stats_report_addr, u64 interval)
{
union gve_adminq_command cmd;
memset(&cmd, 0, sizeof(cmd));
cmd.opcode = cpu_to_be32(GVE_ADMINQ_REPORT_STATS);
cmd.report_stats = (struct gve_adminq_report_stats) {
.stats_report_len = cpu_to_be64(stats_report_len),
.stats_report_addr = cpu_to_be64(stats_report_addr),
.interval = cpu_to_be64(interval),
};
return gve_adminq_execute_cmd(priv, &cmd);
}
int gve_adminq_report_link_speed(struct gve_priv *priv)
{
struct gve_dma_mem link_speed_region_dma_mem;
union gve_adminq_command gvnic_cmd;
u64 *link_speed_region;
int err;
link_speed_region = gve_alloc_dma_mem(&link_speed_region_dma_mem,
sizeof(*link_speed_region));
if (!link_speed_region)
return -ENOMEM;
memset(&gvnic_cmd, 0, sizeof(gvnic_cmd));
gvnic_cmd.opcode = cpu_to_be32(GVE_ADMINQ_REPORT_LINK_SPEED);
gvnic_cmd.report_link_speed.link_speed_address =
cpu_to_be64(link_speed_region_dma_mem.pa);
err = gve_adminq_execute_cmd(priv, &gvnic_cmd);
priv->link_speed = be64_to_cpu(*link_speed_region);
gve_free_dma_mem(&link_speed_region_dma_mem);
return err;
}
int gve_adminq_get_ptype_map_dqo(struct gve_priv *priv,
struct gve_ptype_lut *ptype_lut)
{
struct gve_dma_mem ptype_map_dma_mem;
struct gve_ptype_map *ptype_map;
union gve_adminq_command cmd;
int err = 0;
int i;
memset(&cmd, 0, sizeof(cmd));
ptype_map = gve_alloc_dma_mem(&ptype_map_dma_mem, sizeof(*ptype_map));
if (!ptype_map)
return -ENOMEM;
cmd.opcode = cpu_to_be32(GVE_ADMINQ_GET_PTYPE_MAP);
cmd.get_ptype_map = (struct gve_adminq_get_ptype_map) {
.ptype_map_len = cpu_to_be64(sizeof(*ptype_map)),
.ptype_map_addr = cpu_to_be64(ptype_map_dma_mem.pa),
};
err = gve_adminq_execute_cmd(priv, &cmd);
if (err)
goto err;
/* Populate ptype_lut. */
for (i = 0; i < GVE_NUM_PTYPES; i++) {
ptype_lut->ptypes[i].l3_type =
ptype_map->ptypes[i].l3_type;
ptype_lut->ptypes[i].l4_type =
ptype_map->ptypes[i].l4_type;
}
err:
gve_free_dma_mem(&ptype_map_dma_mem);
return err;
}

View File

@ -0,0 +1,379 @@
/* SPDX-License-Identifier: MIT
* Google Virtual Ethernet (gve) driver
* Copyright (C) 2015-2022 Google, Inc.
*/
#ifndef _GVE_ADMINQ_H
#define _GVE_ADMINQ_H
/* Admin queue opcodes */
enum gve_adminq_opcodes {
GVE_ADMINQ_DESCRIBE_DEVICE = 0x1,
GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES = 0x2,
GVE_ADMINQ_REGISTER_PAGE_LIST = 0x3,
GVE_ADMINQ_UNREGISTER_PAGE_LIST = 0x4,
GVE_ADMINQ_CREATE_TX_QUEUE = 0x5,
GVE_ADMINQ_CREATE_RX_QUEUE = 0x6,
GVE_ADMINQ_DESTROY_TX_QUEUE = 0x7,
GVE_ADMINQ_DESTROY_RX_QUEUE = 0x8,
GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES = 0x9,
GVE_ADMINQ_SET_DRIVER_PARAMETER = 0xB,
GVE_ADMINQ_REPORT_STATS = 0xC,
GVE_ADMINQ_REPORT_LINK_SPEED = 0xD,
GVE_ADMINQ_GET_PTYPE_MAP = 0xE,
};
/* Admin queue status codes */
enum gve_adminq_statuses {
GVE_ADMINQ_COMMAND_UNSET = 0x0,
GVE_ADMINQ_COMMAND_PASSED = 0x1,
GVE_ADMINQ_COMMAND_ERROR_ABORTED = 0xFFFFFFF0,
GVE_ADMINQ_COMMAND_ERROR_ALREADY_EXISTS = 0xFFFFFFF1,
GVE_ADMINQ_COMMAND_ERROR_CANCELLED = 0xFFFFFFF2,
GVE_ADMINQ_COMMAND_ERROR_DATALOSS = 0xFFFFFFF3,
GVE_ADMINQ_COMMAND_ERROR_DEADLINE_EXCEEDED = 0xFFFFFFF4,
GVE_ADMINQ_COMMAND_ERROR_FAILED_PRECONDITION = 0xFFFFFFF5,
GVE_ADMINQ_COMMAND_ERROR_INTERNAL_ERROR = 0xFFFFFFF6,
GVE_ADMINQ_COMMAND_ERROR_INVALID_ARGUMENT = 0xFFFFFFF7,
GVE_ADMINQ_COMMAND_ERROR_NOT_FOUND = 0xFFFFFFF8,
GVE_ADMINQ_COMMAND_ERROR_OUT_OF_RANGE = 0xFFFFFFF9,
GVE_ADMINQ_COMMAND_ERROR_PERMISSION_DENIED = 0xFFFFFFFA,
GVE_ADMINQ_COMMAND_ERROR_UNAUTHENTICATED = 0xFFFFFFFB,
GVE_ADMINQ_COMMAND_ERROR_RESOURCE_EXHAUSTED = 0xFFFFFFFC,
GVE_ADMINQ_COMMAND_ERROR_UNAVAILABLE = 0xFFFFFFFD,
GVE_ADMINQ_COMMAND_ERROR_UNIMPLEMENTED = 0xFFFFFFFE,
GVE_ADMINQ_COMMAND_ERROR_UNKNOWN_ERROR = 0xFFFFFFFF,
};
#define GVE_ADMINQ_DEVICE_DESCRIPTOR_VERSION 1
/* All AdminQ command structs should be naturally packed.
* GVE_CHECK_STRUCT/UNION_LEN will check struct/union length and throw
* error at compile time when the size is not correct.
*/
struct gve_adminq_describe_device {
__be64 device_descriptor_addr;
__be32 device_descriptor_version;
__be32 available_length;
};
GVE_CHECK_STRUCT_LEN(16, gve_adminq_describe_device);
struct gve_device_descriptor {
__be64 max_registered_pages;
__be16 reserved1;
__be16 tx_queue_entries;
__be16 rx_queue_entries;
__be16 default_num_queues;
__be16 mtu;
__be16 counters;
__be16 tx_pages_per_qpl;
__be16 rx_pages_per_qpl;
u8 mac[ETH_ALEN];
__be16 num_device_options;
__be16 total_length;
u8 reserved2[6];
};
GVE_CHECK_STRUCT_LEN(40, gve_device_descriptor);
struct gve_device_option {
__be16 option_id;
__be16 option_length;
__be32 required_features_mask;
};
GVE_CHECK_STRUCT_LEN(8, gve_device_option);
struct gve_device_option_gqi_rda {
__be32 supported_features_mask;
};
GVE_CHECK_STRUCT_LEN(4, gve_device_option_gqi_rda);
struct gve_device_option_gqi_qpl {
__be32 supported_features_mask;
};
GVE_CHECK_STRUCT_LEN(4, gve_device_option_gqi_qpl);
struct gve_device_option_dqo_rda {
__be32 supported_features_mask;
__be16 tx_comp_ring_entries;
__be16 rx_buff_ring_entries;
};
GVE_CHECK_STRUCT_LEN(8, gve_device_option_dqo_rda);
struct gve_device_option_jumbo_frames {
__be32 supported_features_mask;
__be16 max_mtu;
u8 padding[2];
};
GVE_CHECK_STRUCT_LEN(8, gve_device_option_jumbo_frames);
/* Terminology:
*
* RDA - Raw DMA Addressing - Buffers associated with SKBs are directly DMA
* mapped and read/updated by the device.
*
* QPL - Queue Page Lists - Driver uses bounce buffers which are DMA mapped with
* the device for read/write and data is copied from/to SKBs.
*/
enum gve_dev_opt_id {
GVE_DEV_OPT_ID_GQI_RAW_ADDRESSING = 0x1,
GVE_DEV_OPT_ID_GQI_RDA = 0x2,
GVE_DEV_OPT_ID_GQI_QPL = 0x3,
GVE_DEV_OPT_ID_DQO_RDA = 0x4,
GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8,
};
enum gve_dev_opt_req_feat_mask {
GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING = 0x0,
GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA = 0x0,
GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0,
GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0,
GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0,
};
enum gve_sup_feature_mask {
GVE_SUP_JUMBO_FRAMES_MASK = 1 << 2,
};
#define GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING 0x0
struct gve_adminq_configure_device_resources {
__be64 counter_array;
__be64 irq_db_addr;
__be32 num_counters;
__be32 num_irq_dbs;
__be32 irq_db_stride;
__be32 ntfy_blk_msix_base_idx;
u8 queue_format;
u8 padding[7];
};
GVE_CHECK_STRUCT_LEN(40, gve_adminq_configure_device_resources);
struct gve_adminq_register_page_list {
__be32 page_list_id;
__be32 num_pages;
__be64 page_address_list_addr;
};
GVE_CHECK_STRUCT_LEN(16, gve_adminq_register_page_list);
struct gve_adminq_unregister_page_list {
__be32 page_list_id;
};
GVE_CHECK_STRUCT_LEN(4, gve_adminq_unregister_page_list);
#define GVE_RAW_ADDRESSING_QPL_ID 0xFFFFFFFF
struct gve_adminq_create_tx_queue {
__be32 queue_id;
__be32 reserved;
__be64 queue_resources_addr;
__be64 tx_ring_addr;
__be32 queue_page_list_id;
__be32 ntfy_id;
__be64 tx_comp_ring_addr;
__be16 tx_ring_size;
__be16 tx_comp_ring_size;
u8 padding[4];
};
GVE_CHECK_STRUCT_LEN(48, gve_adminq_create_tx_queue);
struct gve_adminq_create_rx_queue {
__be32 queue_id;
__be32 index;
__be32 reserved;
__be32 ntfy_id;
__be64 queue_resources_addr;
__be64 rx_desc_ring_addr;
__be64 rx_data_ring_addr;
__be32 queue_page_list_id;
__be16 rx_ring_size;
__be16 packet_buffer_size;
__be16 rx_buff_ring_size;
u8 enable_rsc;
u8 padding[5];
};
GVE_CHECK_STRUCT_LEN(56, gve_adminq_create_rx_queue);
/* Queue resources that are shared with the device */
struct gve_queue_resources {
union {
struct {
__be32 db_index; /* Device -> Guest */
__be32 counter_index; /* Device -> Guest */
};
u8 reserved[64];
};
};
GVE_CHECK_STRUCT_LEN(64, gve_queue_resources);
struct gve_adminq_destroy_tx_queue {
__be32 queue_id;
};
GVE_CHECK_STRUCT_LEN(4, gve_adminq_destroy_tx_queue);
struct gve_adminq_destroy_rx_queue {
__be32 queue_id;
};
GVE_CHECK_STRUCT_LEN(4, gve_adminq_destroy_rx_queue);
/* GVE Set Driver Parameter Types */
enum gve_set_driver_param_types {
GVE_SET_PARAM_MTU = 0x1,
};
struct gve_adminq_set_driver_parameter {
__be32 parameter_type;
u8 reserved[4];
__be64 parameter_value;
};
GVE_CHECK_STRUCT_LEN(16, gve_adminq_set_driver_parameter);
struct gve_adminq_report_stats {
__be64 stats_report_len;
__be64 stats_report_addr;
__be64 interval;
};
GVE_CHECK_STRUCT_LEN(24, gve_adminq_report_stats);
struct gve_adminq_report_link_speed {
__be64 link_speed_address;
};
GVE_CHECK_STRUCT_LEN(8, gve_adminq_report_link_speed);
struct stats {
__be32 stat_name;
__be32 queue_id;
__be64 value;
};
GVE_CHECK_STRUCT_LEN(16, stats);
struct gve_stats_report {
__be64 written_count;
struct stats stats[];
};
GVE_CHECK_STRUCT_LEN(8, gve_stats_report);
enum gve_stat_names {
/* stats from gve */
TX_WAKE_CNT = 1,
TX_STOP_CNT = 2,
TX_FRAMES_SENT = 3,
TX_BYTES_SENT = 4,
TX_LAST_COMPLETION_PROCESSED = 5,
RX_NEXT_EXPECTED_SEQUENCE = 6,
RX_BUFFERS_POSTED = 7,
TX_TIMEOUT_CNT = 8,
/* stats from NIC */
RX_QUEUE_DROP_CNT = 65,
RX_NO_BUFFERS_POSTED = 66,
RX_DROPS_PACKET_OVER_MRU = 67,
RX_DROPS_INVALID_CHECKSUM = 68,
};
enum gve_l3_type {
/* Must be zero so zero initialized LUT is unknown. */
GVE_L3_TYPE_UNKNOWN = 0,
GVE_L3_TYPE_OTHER,
GVE_L3_TYPE_IPV4,
GVE_L3_TYPE_IPV6,
};
enum gve_l4_type {
/* Must be zero so zero initialized LUT is unknown. */
GVE_L4_TYPE_UNKNOWN = 0,
GVE_L4_TYPE_OTHER,
GVE_L4_TYPE_TCP,
GVE_L4_TYPE_UDP,
GVE_L4_TYPE_ICMP,
GVE_L4_TYPE_SCTP,
};
/* These are control path types for PTYPE which are the same as the data path
* types.
*/
struct gve_ptype_entry {
u8 l3_type;
u8 l4_type;
};
struct gve_ptype_map {
struct gve_ptype_entry ptypes[1 << 10]; /* PTYPES are always 10 bits. */
};
struct gve_adminq_get_ptype_map {
__be64 ptype_map_len;
__be64 ptype_map_addr;
};
union gve_adminq_command {
struct {
__be32 opcode;
__be32 status;
union {
struct gve_adminq_configure_device_resources
configure_device_resources;
struct gve_adminq_create_tx_queue create_tx_queue;
struct gve_adminq_create_rx_queue create_rx_queue;
struct gve_adminq_destroy_tx_queue destroy_tx_queue;
struct gve_adminq_destroy_rx_queue destroy_rx_queue;
struct gve_adminq_describe_device describe_device;
struct gve_adminq_register_page_list reg_page_list;
struct gve_adminq_unregister_page_list unreg_page_list;
struct gve_adminq_set_driver_parameter set_driver_param;
struct gve_adminq_report_stats report_stats;
struct gve_adminq_report_link_speed report_link_speed;
struct gve_adminq_get_ptype_map get_ptype_map;
};
};
u8 reserved[64];
};
GVE_CHECK_UNION_LEN(64, gve_adminq_command);
int gve_adminq_alloc(struct gve_priv *priv);
void gve_adminq_free(struct gve_priv *priv);
void gve_adminq_release(struct gve_priv *priv);
int gve_adminq_describe_device(struct gve_priv *priv);
int gve_adminq_configure_device_resources(struct gve_priv *priv,
dma_addr_t counter_array_bus_addr,
u32 num_counters,
dma_addr_t db_array_bus_addr,
u32 num_ntfy_blks);
int gve_adminq_deconfigure_device_resources(struct gve_priv *priv);
int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 num_queues);
int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 queue_id);
int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues);
int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 queue_id);
int gve_adminq_register_page_list(struct gve_priv *priv,
struct gve_queue_page_list *qpl);
int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id);
int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu);
int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len,
dma_addr_t stats_report_addr, u64 interval);
int gve_adminq_report_link_speed(struct gve_priv *priv);
struct gve_ptype_lut;
int gve_adminq_get_ptype_map_dqo(struct gve_priv *priv,
struct gve_ptype_lut *ptype_lut);
#endif /* _GVE_ADMINQ_H */

View File

@ -0,0 +1,136 @@
/* SPDX-License-Identifier: MIT
* Google Virtual Ethernet (gve) driver
* Copyright (C) 2015-2022 Google, Inc.
*/
/* GVE Transmit Descriptor formats */
#ifndef _GVE_DESC_H_
#define _GVE_DESC_H_
/* A note on seg_addrs
*
* Base addresses encoded in seg_addr are not assumed to be physical
* addresses. The ring format assumes these come from some linear address
* space. This could be physical memory, kernel virtual memory, user virtual
* memory.
* If raw dma addressing is not supported then gVNIC uses lists of registered
* pages. Each queue is assumed to be associated with a single such linear
* address space to ensure a consistent meaning for seg_addrs posted to its
* rings.
*/
struct gve_tx_pkt_desc {
u8 type_flags; /* desc type is lower 4 bits, flags upper */
u8 l4_csum_offset; /* relative offset of L4 csum word */
u8 l4_hdr_offset; /* Offset of start of L4 headers in packet */
u8 desc_cnt; /* Total descriptors for this packet */
__be16 len; /* Total length of this packet (in bytes) */
__be16 seg_len; /* Length of this descriptor's segment */
__be64 seg_addr; /* Base address (see note) of this segment */
} __packed;
struct gve_tx_mtd_desc {
u8 type_flags; /* type is lower 4 bits, subtype upper */
u8 path_state; /* state is lower 4 bits, hash type upper */
__be16 reserved0;
__be32 path_hash;
__be64 reserved1;
} __packed;
struct gve_tx_seg_desc {
u8 type_flags; /* type is lower 4 bits, flags upper */
u8 l3_offset; /* TSO: 2 byte units to start of IPH */
__be16 reserved;
__be16 mss; /* TSO MSS */
__be16 seg_len;
__be64 seg_addr;
} __packed;
/* GVE Transmit Descriptor Types */
#define GVE_TXD_STD (0x0 << 4) /* Std with Host Address */
#define GVE_TXD_TSO (0x1 << 4) /* TSO with Host Address */
#define GVE_TXD_SEG (0x2 << 4) /* Seg with Host Address */
#define GVE_TXD_MTD (0x3 << 4) /* Metadata */
/* GVE Transmit Descriptor Flags for Std Pkts */
#define GVE_TXF_L4CSUM BIT(0) /* Need csum offload */
#define GVE_TXF_TSTAMP BIT(2) /* Timestamp required */
/* GVE Transmit Descriptor Flags for TSO Segs */
#define GVE_TXSF_IPV6 BIT(1) /* IPv6 TSO */
/* GVE Transmit Descriptor Options for MTD Segs */
#define GVE_MTD_SUBTYPE_PATH 0
#define GVE_MTD_PATH_STATE_DEFAULT 0
#define GVE_MTD_PATH_STATE_TIMEOUT 1
#define GVE_MTD_PATH_STATE_CONGESTION 2
#define GVE_MTD_PATH_STATE_RETRANSMIT 3
#define GVE_MTD_PATH_HASH_NONE (0x0 << 4)
#define GVE_MTD_PATH_HASH_L4 (0x1 << 4)
/* GVE Receive Packet Descriptor */
/* The start of an ethernet packet comes 2 bytes into the rx buffer.
* gVNIC adds this padding so that both the DMA and the L3/4 protocol header
* access is aligned.
*/
#define GVE_RX_PAD 2
struct gve_rx_desc {
u8 padding[48];
__be32 rss_hash; /* Receive-side scaling hash (Toeplitz for gVNIC) */
__be16 mss;
__be16 reserved; /* Reserved to zero */
u8 hdr_len; /* Header length (L2-L4) including padding */
u8 hdr_off; /* 64-byte-scaled offset into RX_DATA entry */
__sum16 csum; /* 1's-complement partial checksum of L3+ bytes */
__be16 len; /* Length of the received packet */
__be16 flags_seq; /* Flags [15:3] and sequence number [2:0] (1-7) */
} __packed;
GVE_CHECK_STRUCT_LEN(64, gve_rx_desc);
/* If the device supports raw dma addressing then the addr in data slot is
* the dma address of the buffer.
* If the device only supports registered segments then the addr is a byte
* offset into the registered segment (an ordered list of pages) where the
* buffer is.
*/
union gve_rx_data_slot {
__be64 qpl_offset;
__be64 addr;
};
/* GVE Receive Packet Descriptor Seq No */
#define GVE_SEQNO(x) (be16_to_cpu(x) & 0x7)
/* GVE Receive Packet Descriptor Flags */
#define GVE_RXFLG(x) cpu_to_be16(1 << (3 + (x)))
#define GVE_RXF_FRAG GVE_RXFLG(3) /* IP Fragment */
#define GVE_RXF_IPV4 GVE_RXFLG(4) /* IPv4 */
#define GVE_RXF_IPV6 GVE_RXFLG(5) /* IPv6 */
#define GVE_RXF_TCP GVE_RXFLG(6) /* TCP Packet */
#define GVE_RXF_UDP GVE_RXFLG(7) /* UDP Packet */
#define GVE_RXF_ERR GVE_RXFLG(8) /* Packet Error Detected */
#define GVE_RXF_PKT_CONT GVE_RXFLG(10) /* Multi Fragment RX packet */
/* GVE IRQ */
#define GVE_IRQ_ACK BIT(31)
#define GVE_IRQ_MASK BIT(30)
#define GVE_IRQ_EVENT BIT(29)
static inline bool gve_needs_rss(__be16 flag)
{
if (flag & GVE_RXF_FRAG)
return false;
if (flag & (GVE_RXF_IPV4 | GVE_RXF_IPV6))
return true;
return false;
}
static inline u8 gve_next_seqno(u8 seq)
{
return (seq + 1) == 8 ? 1 : seq + 1;
}
#endif /* _GVE_DESC_H_ */

View File

@ -0,0 +1,253 @@
/* SPDX-License-Identifier: MIT
* Google Virtual Ethernet (gve) driver
* Copyright (C) 2015-2022 Google, Inc.
*/
/* GVE DQO Descriptor formats */
#ifndef _GVE_DESC_DQO_H_
#define _GVE_DESC_DQO_H_
#define GVE_TX_MAX_HDR_SIZE_DQO 255
#define GVE_TX_MIN_TSO_MSS_DQO 88
#ifndef __LITTLE_ENDIAN_BITFIELD
#error "Only little endian supported"
#endif
/* Basic TX descriptor (DTYPE 0x0C) */
struct gve_tx_pkt_desc_dqo {
__le64 buf_addr;
/* Must be GVE_TX_PKT_DESC_DTYPE_DQO (0xc) */
u8 dtype: 5;
/* Denotes the last descriptor of a packet. */
u8 end_of_packet: 1;
u8 checksum_offload_enable: 1;
/* If set, will generate a descriptor completion for this descriptor. */
u8 report_event: 1;
u8 reserved0;
__le16 reserved1;
/* The TX completion associated with this packet will contain this tag.
*/
__le16 compl_tag;
u16 buf_size: 14;
u16 reserved2: 2;
} __packed;
GVE_CHECK_STRUCT_LEN(16, gve_tx_pkt_desc_dqo);
#define GVE_TX_PKT_DESC_DTYPE_DQO 0xc
#define GVE_TX_MAX_BUF_SIZE_DQO ((16 * 1024) - 1)
/* Maximum number of data descriptors allowed per packet, or per-TSO segment. */
#define GVE_TX_MAX_DATA_DESCS 10
/* Min gap between tail and head to avoid cacheline overlap */
#define GVE_TX_MIN_DESC_PREVENT_CACHE_OVERLAP 4
/* "report_event" on TX packet descriptors may only be reported on the last
* descriptor of a TX packet, and they must be spaced apart with at least this
* value.
*/
#define GVE_TX_MIN_RE_INTERVAL 32
struct gve_tx_context_cmd_dtype {
u8 dtype: 5;
u8 tso: 1;
u8 reserved1: 2;
u8 reserved2;
};
GVE_CHECK_STRUCT_LEN(2, gve_tx_context_cmd_dtype);
/* TX Native TSO Context DTYPE (0x05)
*
* "flex" fields allow the driver to send additional packet context to HW.
*/
struct gve_tx_tso_context_desc_dqo {
/* The L4 payload bytes that should be segmented. */
u32 tso_total_len: 24;
u32 flex10: 8;
/* Max segment size in TSO excluding headers. */
u16 mss: 14;
u16 reserved: 2;
u8 header_len; /* Header length to use for TSO offload */
u8 flex11;
struct gve_tx_context_cmd_dtype cmd_dtype;
u8 flex0;
u8 flex5;
u8 flex6;
u8 flex7;
u8 flex8;
u8 flex9;
} __packed;
GVE_CHECK_STRUCT_LEN(16, gve_tx_tso_context_desc_dqo);
#define GVE_TX_TSO_CTX_DESC_DTYPE_DQO 0x5
/* General context descriptor for sending metadata. */
struct gve_tx_general_context_desc_dqo {
u8 flex4;
u8 flex5;
u8 flex6;
u8 flex7;
u8 flex8;
u8 flex9;
u8 flex10;
u8 flex11;
struct gve_tx_context_cmd_dtype cmd_dtype;
u16 reserved;
u8 flex0;
u8 flex1;
u8 flex2;
u8 flex3;
} __packed;
GVE_CHECK_STRUCT_LEN(16, gve_tx_general_context_desc_dqo);
#define GVE_TX_GENERAL_CTX_DESC_DTYPE_DQO 0x4
/* Logical structure of metadata which is packed into context descriptor flex
* fields.
*/
struct gve_tx_metadata_dqo {
union {
struct {
u8 version;
/* If `skb->l4_hash` is set, this value should be
* derived from `skb->hash`.
*
* A zero value means no l4_hash was associated with the
* skb.
*/
u16 path_hash: 15;
/* Should be set to 1 if the flow associated with the
* skb had a rehash from the TCP stack.
*/
u16 rehash_event: 1;
} __packed;
u8 bytes[12];
};
} __packed;
GVE_CHECK_STRUCT_LEN(12, gve_tx_metadata_dqo);
#define GVE_TX_METADATA_VERSION_DQO 0
/* TX completion descriptor */
struct gve_tx_compl_desc {
/* For types 0-4 this is the TX queue ID associated with this
* completion.
*/
u16 id: 11;
/* See: GVE_COMPL_TYPE_DQO* */
u16 type: 3;
u16 reserved0: 1;
/* Flipped by HW to notify the descriptor is populated. */
u16 generation: 1;
union {
/* For descriptor completions, this is the last index fetched
* by HW + 1.
*/
__le16 tx_head;
/* For packet completions, this is the completion tag set on the
* TX packet descriptors.
*/
__le16 completion_tag;
};
__le32 reserved1;
} __packed;
GVE_CHECK_STRUCT_LEN(8, gve_tx_compl_desc);
#define GVE_COMPL_TYPE_DQO_PKT 0x2 /* Packet completion */
#define GVE_COMPL_TYPE_DQO_DESC 0x4 /* Descriptor completion */
#define GVE_COMPL_TYPE_DQO_MISS 0x1 /* Miss path completion */
#define GVE_COMPL_TYPE_DQO_REINJECTION 0x3 /* Re-injection completion */
/* Descriptor to post buffers to HW on buffer queue. */
struct gve_rx_desc_dqo {
__le16 buf_id; /* ID returned in Rx completion descriptor */
__le16 reserved0;
__le32 reserved1;
__le64 buf_addr; /* DMA address of the buffer */
__le64 header_buf_addr;
__le64 reserved2;
} __packed;
GVE_CHECK_STRUCT_LEN(32, gve_rx_desc_dqo);
/* Descriptor for HW to notify SW of new packets received on RX queue. */
struct gve_rx_compl_desc_dqo {
/* Must be 1 */
u8 rxdid: 4;
u8 reserved0: 4;
/* Packet originated from this system rather than the network. */
u8 loopback: 1;
/* Set when IPv6 packet contains a destination options header or routing
* header.
*/
u8 ipv6_ex_add: 1;
/* Invalid packet was received. */
u8 rx_error: 1;
u8 reserved1: 5;
u16 packet_type: 10;
u16 ip_hdr_err: 1;
u16 udp_len_err: 1;
u16 raw_cs_invalid: 1;
u16 reserved2: 3;
u16 packet_len: 14;
/* Flipped by HW to notify the descriptor is populated. */
u16 generation: 1;
/* Should be zero. */
u16 buffer_queue_id: 1;
u16 header_len: 10;
u16 rsc: 1;
u16 split_header: 1;
u16 reserved3: 4;
u8 descriptor_done: 1;
u8 end_of_packet: 1;
u8 header_buffer_overflow: 1;
u8 l3_l4_processed: 1;
u8 csum_ip_err: 1;
u8 csum_l4_err: 1;
u8 csum_external_ip_err: 1;
u8 csum_external_udp_err: 1;
u8 status_error1;
__le16 reserved5;
__le16 buf_id; /* Buffer ID which was sent on the buffer queue. */
union {
/* Packet checksum. */
__le16 raw_cs;
/* Segment length for RSC packets. */
__le16 rsc_seg_len;
};
__le32 hash;
__le32 reserved6;
__le64 reserved7;
} __packed;
GVE_CHECK_STRUCT_LEN(32, gve_rx_compl_desc_dqo);
/* Ringing the doorbell too often can hurt performance.
*
* HW requires this value to be at least 8.
*/
#define GVE_RX_BUF_THRESH_DQO 32
#endif /* _GVE_DESC_DQO_H_ */

View File

@ -0,0 +1,27 @@
/* SPDX-License-Identifier: MIT
* Google Virtual Ethernet (gve) driver
* Copyright (C) 2015-2022 Google, Inc.
*/
#ifndef _GVE_REGISTER_H_
#define _GVE_REGISTER_H_
/* Fixed Configuration Registers */
struct gve_registers {
__be32 device_status;
__be32 driver_status;
__be32 max_tx_queues;
__be32 max_rx_queues;
__be32 adminq_pfn;
__be32 adminq_doorbell;
__be32 adminq_event_counter;
u8 reserved[3];
u8 driver_version;
};
enum gve_device_status_flags {
GVE_DEVICE_STATUS_RESET_MASK = BIT(1),
GVE_DEVICE_STATUS_LINK_STATUS_MASK = BIT(2),
GVE_DEVICE_STATUS_REPORT_STATS_MASK = BIT(3),
};
#endif /* _GVE_REGISTER_H_ */