numam-dpdk/drivers/net/thunderx/nicvf_ethdev.c

2236 lines
59 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2016 Cavium, Inc
*/
#include <assert.h>
#include <stdio.h>
#include <stdbool.h>
#include <errno.h>
#include <stdint.h>
#include <string.h>
#include <unistd.h>
#include <stdarg.h>
#include <inttypes.h>
#include <netinet/in.h>
#include <sys/queue.h>
#include <rte_alarm.h>
#include <rte_branch_prediction.h>
#include <rte_byteorder.h>
#include <rte_common.h>
#include <rte_cycles.h>
#include <rte_debug.h>
#include <rte_dev.h>
#include <rte_eal.h>
#include <rte_ether.h>
#include <rte_ethdev_driver.h>
#include <rte_ethdev_pci.h>
#include <rte_interrupts.h>
#include <rte_log.h>
#include <rte_memory.h>
#include <rte_memzone.h>
#include <rte_malloc.h>
#include <rte_random.h>
#include <rte_pci.h>
#include <rte_bus_pci.h>
#include <rte_tailq.h>
#include <rte_devargs.h>
#include <rte_kvargs.h>
#include "base/nicvf_plat.h"
#include "nicvf_ethdev.h"
#include "nicvf_rxtx.h"
#include "nicvf_svf.h"
#include "nicvf_logs.h"
int nicvf_logtype_mbox;
int nicvf_logtype_init;
int nicvf_logtype_driver;
static void nicvf_dev_stop(struct rte_eth_dev *dev);
static void nicvf_dev_stop_cleanup(struct rte_eth_dev *dev, bool cleanup);
static void nicvf_vf_stop(struct rte_eth_dev *dev, struct nicvf *nic,
bool cleanup);
RTE_INIT(nicvf_init_log);
static void
nicvf_init_log(void)
{
nicvf_logtype_mbox = rte_log_register("pmd.net.thunderx.mbox");
if (nicvf_logtype_mbox >= 0)
rte_log_set_level(nicvf_logtype_mbox, RTE_LOG_NOTICE);
nicvf_logtype_init = rte_log_register("pmd.net.thunderx.init");
if (nicvf_logtype_init >= 0)
rte_log_set_level(nicvf_logtype_init, RTE_LOG_NOTICE);
nicvf_logtype_driver = rte_log_register("pmd.net.thunderx.driver");
if (nicvf_logtype_driver >= 0)
rte_log_set_level(nicvf_logtype_driver, RTE_LOG_NOTICE);
}
static void
nicvf_link_status_update(struct nicvf *nic,
struct rte_eth_link *link)
{
memset(link, 0, sizeof(*link));
link->link_status = nic->link_up ? ETH_LINK_UP : ETH_LINK_DOWN;
if (nic->duplex == NICVF_HALF_DUPLEX)
link->link_duplex = ETH_LINK_HALF_DUPLEX;
else if (nic->duplex == NICVF_FULL_DUPLEX)
link->link_duplex = ETH_LINK_FULL_DUPLEX;
link->link_speed = nic->speed;
link->link_autoneg = ETH_LINK_AUTONEG;
}
static void
nicvf_interrupt(void *arg)
{
struct rte_eth_dev *dev = arg;
struct nicvf *nic = nicvf_pmd_priv(dev);
struct rte_eth_link link;
if (nicvf_reg_poll_interrupts(nic) == NIC_MBOX_MSG_BGX_LINK_CHANGE) {
if (dev->data->dev_conf.intr_conf.lsc) {
nicvf_link_status_update(nic, &link);
rte_eth_linkstatus_set(dev, &link);
_rte_eth_dev_callback_process(dev,
RTE_ETH_EVENT_INTR_LSC,
NULL);
}
}
rte_eal_alarm_set(NICVF_INTR_POLL_INTERVAL_MS * 1000,
nicvf_interrupt, dev);
}
static void
nicvf_vf_interrupt(void *arg)
{
struct nicvf *nic = arg;
nicvf_reg_poll_interrupts(nic);
rte_eal_alarm_set(NICVF_INTR_POLL_INTERVAL_MS * 1000,
nicvf_vf_interrupt, nic);
}
static int
nicvf_periodic_alarm_start(void (fn)(void *), void *arg)
{
return rte_eal_alarm_set(NICVF_INTR_POLL_INTERVAL_MS * 1000, fn, arg);
}
static int
nicvf_periodic_alarm_stop(void (fn)(void *), void *arg)
{
return rte_eal_alarm_cancel(fn, arg);
}
/*
* Return 0 means link status changed, -1 means not changed
*/
static int
nicvf_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete)
{
#define CHECK_INTERVAL 100 /* 100ms */
#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
struct rte_eth_link link;
struct nicvf *nic = nicvf_pmd_priv(dev);
int i;
PMD_INIT_FUNC_TRACE();
if (wait_to_complete) {
/* rte_eth_link_get() might need to wait up to 9 seconds */
for (i = 0; i < MAX_CHECK_TIME; i++) {
nicvf_link_status_update(nic, &link);
if (link.link_status == ETH_LINK_UP)
break;
rte_delay_ms(CHECK_INTERVAL);
}
} else {
nicvf_link_status_update(nic, &link);
}
return rte_eth_linkstatus_set(dev, &link);
}
static int
nicvf_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
{
struct nicvf *nic = nicvf_pmd_priv(dev);
uint32_t buffsz, frame_size = mtu + NIC_HW_L2_OVERHEAD;
size_t i;
struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
PMD_INIT_FUNC_TRACE();
if (frame_size > NIC_HW_MAX_FRS)
return -EINVAL;
if (frame_size < NIC_HW_MIN_FRS)
return -EINVAL;
buffsz = dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM;
/*
* Refuse mtu that requires the support of scattered packets
* when this feature has not been enabled before.
*/
if (dev->data->dev_started && !dev->data->scattered_rx &&
(frame_size + 2 * VLAN_TAG_SIZE > buffsz))
return -EINVAL;
/* check <seg size> * <max_seg> >= max_frame */
if (dev->data->scattered_rx &&
(frame_size + 2 * VLAN_TAG_SIZE > buffsz * NIC_HW_MAX_SEGS))
return -EINVAL;
if (frame_size > ETHER_MAX_LEN)
rxmode->offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
else
rxmode->offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
if (nicvf_mbox_update_hw_max_frs(nic, mtu))
return -EINVAL;
/* Update max_rx_pkt_len */
rxmode->max_rx_pkt_len = mtu + ETHER_HDR_LEN;
nic->mtu = mtu;
for (i = 0; i < nic->sqs_count; i++)
nic->snicvf[i]->mtu = mtu;
return 0;
}
static int
nicvf_dev_get_regs(struct rte_eth_dev *dev, struct rte_dev_reg_info *regs)
{
uint64_t *data = regs->data;
struct nicvf *nic = nicvf_pmd_priv(dev);
if (data == NULL) {
regs->length = nicvf_reg_get_count();
regs->width = THUNDERX_REG_BYTES;
return 0;
}
/* Support only full register dump */
if ((regs->length == 0) ||
(regs->length == (uint32_t)nicvf_reg_get_count())) {
regs->version = nic->vendor_id << 16 | nic->device_id;
nicvf_reg_dump(nic, data);
return 0;
}
return -ENOTSUP;
}
static int
nicvf_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
{
uint16_t qidx;
struct nicvf_hw_rx_qstats rx_qstats;
struct nicvf_hw_tx_qstats tx_qstats;
struct nicvf_hw_stats port_stats;
struct nicvf *nic = nicvf_pmd_priv(dev);
uint16_t rx_start, rx_end;
uint16_t tx_start, tx_end;
size_t i;
/* RX queue indices for the first VF */
nicvf_rx_range(dev, nic, &rx_start, &rx_end);
/* Reading per RX ring stats */
for (qidx = rx_start; qidx <= rx_end; qidx++) {
if (qidx >= RTE_ETHDEV_QUEUE_STAT_CNTRS)
break;
nicvf_hw_get_rx_qstats(nic, &rx_qstats, qidx);
stats->q_ibytes[qidx] = rx_qstats.q_rx_bytes;
stats->q_ipackets[qidx] = rx_qstats.q_rx_packets;
}
/* TX queue indices for the first VF */
nicvf_tx_range(dev, nic, &tx_start, &tx_end);
/* Reading per TX ring stats */
for (qidx = tx_start; qidx <= tx_end; qidx++) {
if (qidx >= RTE_ETHDEV_QUEUE_STAT_CNTRS)
break;
nicvf_hw_get_tx_qstats(nic, &tx_qstats, qidx);
stats->q_obytes[qidx] = tx_qstats.q_tx_bytes;
stats->q_opackets[qidx] = tx_qstats.q_tx_packets;
}
for (i = 0; i < nic->sqs_count; i++) {
struct nicvf *snic = nic->snicvf[i];
if (snic == NULL)
break;
/* RX queue indices for a secondary VF */
nicvf_rx_range(dev, snic, &rx_start, &rx_end);
/* Reading per RX ring stats */
for (qidx = rx_start; qidx <= rx_end; qidx++) {
if (qidx >= RTE_ETHDEV_QUEUE_STAT_CNTRS)
break;
nicvf_hw_get_rx_qstats(snic, &rx_qstats,
qidx % MAX_RCV_QUEUES_PER_QS);
stats->q_ibytes[qidx] = rx_qstats.q_rx_bytes;
stats->q_ipackets[qidx] = rx_qstats.q_rx_packets;
}
/* TX queue indices for a secondary VF */
nicvf_tx_range(dev, snic, &tx_start, &tx_end);
/* Reading per TX ring stats */
for (qidx = tx_start; qidx <= tx_end; qidx++) {
if (qidx >= RTE_ETHDEV_QUEUE_STAT_CNTRS)
break;
nicvf_hw_get_tx_qstats(snic, &tx_qstats,
qidx % MAX_SND_QUEUES_PER_QS);
stats->q_obytes[qidx] = tx_qstats.q_tx_bytes;
stats->q_opackets[qidx] = tx_qstats.q_tx_packets;
}
}
nicvf_hw_get_stats(nic, &port_stats);
stats->ibytes = port_stats.rx_bytes;
stats->ipackets = port_stats.rx_ucast_frames;
stats->ipackets += port_stats.rx_bcast_frames;
stats->ipackets += port_stats.rx_mcast_frames;
stats->ierrors = port_stats.rx_l2_errors;
stats->imissed = port_stats.rx_drop_red;
stats->imissed += port_stats.rx_drop_overrun;
stats->imissed += port_stats.rx_drop_bcast;
stats->imissed += port_stats.rx_drop_mcast;
stats->imissed += port_stats.rx_drop_l3_bcast;
stats->imissed += port_stats.rx_drop_l3_mcast;
stats->obytes = port_stats.tx_bytes_ok;
stats->opackets = port_stats.tx_ucast_frames_ok;
stats->opackets += port_stats.tx_bcast_frames_ok;
stats->opackets += port_stats.tx_mcast_frames_ok;
stats->oerrors = port_stats.tx_drops;
return 0;
}
static const uint32_t *
nicvf_dev_supported_ptypes_get(struct rte_eth_dev *dev)
{
size_t copied;
static uint32_t ptypes[32];
struct nicvf *nic = nicvf_pmd_priv(dev);
static const uint32_t ptypes_common[] = {
RTE_PTYPE_L3_IPV4,
RTE_PTYPE_L3_IPV4_EXT,
RTE_PTYPE_L3_IPV6,
RTE_PTYPE_L3_IPV6_EXT,
RTE_PTYPE_L4_TCP,
RTE_PTYPE_L4_UDP,
RTE_PTYPE_L4_FRAG,
};
static const uint32_t ptypes_tunnel[] = {
RTE_PTYPE_TUNNEL_GRE,
RTE_PTYPE_TUNNEL_GENEVE,
RTE_PTYPE_TUNNEL_VXLAN,
RTE_PTYPE_TUNNEL_NVGRE,
};
static const uint32_t ptypes_end = RTE_PTYPE_UNKNOWN;
copied = sizeof(ptypes_common);
memcpy(ptypes, ptypes_common, copied);
if (nicvf_hw_cap(nic) & NICVF_CAP_TUNNEL_PARSING) {
memcpy((char *)ptypes + copied, ptypes_tunnel,
sizeof(ptypes_tunnel));
copied += sizeof(ptypes_tunnel);
}
memcpy((char *)ptypes + copied, &ptypes_end, sizeof(ptypes_end));
if (dev->rx_pkt_burst == nicvf_recv_pkts ||
dev->rx_pkt_burst == nicvf_recv_pkts_multiseg)
return ptypes;
return NULL;
}
static void
nicvf_dev_stats_reset(struct rte_eth_dev *dev)
{
int i;
uint16_t rxqs = 0, txqs = 0;
struct nicvf *nic = nicvf_pmd_priv(dev);
uint16_t rx_start, rx_end;
uint16_t tx_start, tx_end;
/* Reset all primary nic counters */
nicvf_rx_range(dev, nic, &rx_start, &rx_end);
for (i = rx_start; i <= rx_end; i++)
rxqs |= (0x3 << (i * 2));
nicvf_tx_range(dev, nic, &tx_start, &tx_end);
for (i = tx_start; i <= tx_end; i++)
txqs |= (0x3 << (i * 2));
nicvf_mbox_reset_stat_counters(nic, 0x3FFF, 0x1F, rxqs, txqs);
/* Reset secondary nic queue counters */
for (i = 0; i < nic->sqs_count; i++) {
struct nicvf *snic = nic->snicvf[i];
if (snic == NULL)
break;
nicvf_rx_range(dev, snic, &rx_start, &rx_end);
for (i = rx_start; i <= rx_end; i++)
rxqs |= (0x3 << ((i % MAX_CMP_QUEUES_PER_QS) * 2));
nicvf_tx_range(dev, snic, &tx_start, &tx_end);
for (i = tx_start; i <= tx_end; i++)
txqs |= (0x3 << ((i % MAX_SND_QUEUES_PER_QS) * 2));
nicvf_mbox_reset_stat_counters(snic, 0, 0, rxqs, txqs);
}
}
/* Promiscuous mode enabled by default in LMAC to VF 1:1 map configuration */
static void
nicvf_dev_promisc_enable(struct rte_eth_dev *dev __rte_unused)
{
}
static inline uint64_t
nicvf_rss_ethdev_to_nic(struct nicvf *nic, uint64_t ethdev_rss)
{
uint64_t nic_rss = 0;
if (ethdev_rss & ETH_RSS_IPV4)
nic_rss |= RSS_IP_ENA;
if (ethdev_rss & ETH_RSS_IPV6)
nic_rss |= RSS_IP_ENA;
if (ethdev_rss & ETH_RSS_NONFRAG_IPV4_UDP)
nic_rss |= (RSS_IP_ENA | RSS_UDP_ENA);
if (ethdev_rss & ETH_RSS_NONFRAG_IPV4_TCP)
nic_rss |= (RSS_IP_ENA | RSS_TCP_ENA);
if (ethdev_rss & ETH_RSS_NONFRAG_IPV6_UDP)
nic_rss |= (RSS_IP_ENA | RSS_UDP_ENA);
if (ethdev_rss & ETH_RSS_NONFRAG_IPV6_TCP)
nic_rss |= (RSS_IP_ENA | RSS_TCP_ENA);
if (ethdev_rss & ETH_RSS_PORT)
nic_rss |= RSS_L2_EXTENDED_HASH_ENA;
if (nicvf_hw_cap(nic) & NICVF_CAP_TUNNEL_PARSING) {
if (ethdev_rss & ETH_RSS_VXLAN)
nic_rss |= RSS_TUN_VXLAN_ENA;
if (ethdev_rss & ETH_RSS_GENEVE)
nic_rss |= RSS_TUN_GENEVE_ENA;
if (ethdev_rss & ETH_RSS_NVGRE)
nic_rss |= RSS_TUN_NVGRE_ENA;
}
return nic_rss;
}
static inline uint64_t
nicvf_rss_nic_to_ethdev(struct nicvf *nic, uint64_t nic_rss)
{
uint64_t ethdev_rss = 0;
if (nic_rss & RSS_IP_ENA)
ethdev_rss |= (ETH_RSS_IPV4 | ETH_RSS_IPV6);
if ((nic_rss & RSS_IP_ENA) && (nic_rss & RSS_TCP_ENA))
ethdev_rss |= (ETH_RSS_NONFRAG_IPV4_TCP |
ETH_RSS_NONFRAG_IPV6_TCP);
if ((nic_rss & RSS_IP_ENA) && (nic_rss & RSS_UDP_ENA))
ethdev_rss |= (ETH_RSS_NONFRAG_IPV4_UDP |
ETH_RSS_NONFRAG_IPV6_UDP);
if (nic_rss & RSS_L2_EXTENDED_HASH_ENA)
ethdev_rss |= ETH_RSS_PORT;
if (nicvf_hw_cap(nic) & NICVF_CAP_TUNNEL_PARSING) {
if (nic_rss & RSS_TUN_VXLAN_ENA)
ethdev_rss |= ETH_RSS_VXLAN;
if (nic_rss & RSS_TUN_GENEVE_ENA)
ethdev_rss |= ETH_RSS_GENEVE;
if (nic_rss & RSS_TUN_NVGRE_ENA)
ethdev_rss |= ETH_RSS_NVGRE;
}
return ethdev_rss;
}
static int
nicvf_dev_reta_query(struct rte_eth_dev *dev,
struct rte_eth_rss_reta_entry64 *reta_conf,
uint16_t reta_size)
{
struct nicvf *nic = nicvf_pmd_priv(dev);
uint8_t tbl[NIC_MAX_RSS_IDR_TBL_SIZE];
int ret, i, j;
if (reta_size != NIC_MAX_RSS_IDR_TBL_SIZE) {
RTE_LOG(ERR, PMD, "The size of hash lookup table configured "
"(%d) doesn't match the number hardware can supported "
"(%d)", reta_size, NIC_MAX_RSS_IDR_TBL_SIZE);
return -EINVAL;
}
ret = nicvf_rss_reta_query(nic, tbl, NIC_MAX_RSS_IDR_TBL_SIZE);
if (ret)
return ret;
/* Copy RETA table */
for (i = 0; i < (NIC_MAX_RSS_IDR_TBL_SIZE / RTE_RETA_GROUP_SIZE); i++) {
for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
if ((reta_conf[i].mask >> j) & 0x01)
reta_conf[i].reta[j] = tbl[j];
}
return 0;
}
static int
nicvf_dev_reta_update(struct rte_eth_dev *dev,
struct rte_eth_rss_reta_entry64 *reta_conf,
uint16_t reta_size)
{
struct nicvf *nic = nicvf_pmd_priv(dev);
uint8_t tbl[NIC_MAX_RSS_IDR_TBL_SIZE];
int ret, i, j;
if (reta_size != NIC_MAX_RSS_IDR_TBL_SIZE) {
RTE_LOG(ERR, PMD, "The size of hash lookup table configured "
"(%d) doesn't match the number hardware can supported "
"(%d)", reta_size, NIC_MAX_RSS_IDR_TBL_SIZE);
return -EINVAL;
}
ret = nicvf_rss_reta_query(nic, tbl, NIC_MAX_RSS_IDR_TBL_SIZE);
if (ret)
return ret;
/* Copy RETA table */
for (i = 0; i < (NIC_MAX_RSS_IDR_TBL_SIZE / RTE_RETA_GROUP_SIZE); i++) {
for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
if ((reta_conf[i].mask >> j) & 0x01)
tbl[j] = reta_conf[i].reta[j];
}
return nicvf_rss_reta_update(nic, tbl, NIC_MAX_RSS_IDR_TBL_SIZE);
}
static int
nicvf_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
struct rte_eth_rss_conf *rss_conf)
{
struct nicvf *nic = nicvf_pmd_priv(dev);
if (rss_conf->rss_key)
nicvf_rss_get_key(nic, rss_conf->rss_key);
rss_conf->rss_key_len = RSS_HASH_KEY_BYTE_SIZE;
rss_conf->rss_hf = nicvf_rss_nic_to_ethdev(nic, nicvf_rss_get_cfg(nic));
return 0;
}
static int
nicvf_dev_rss_hash_update(struct rte_eth_dev *dev,
struct rte_eth_rss_conf *rss_conf)
{
struct nicvf *nic = nicvf_pmd_priv(dev);
uint64_t nic_rss;
if (rss_conf->rss_key &&
rss_conf->rss_key_len != RSS_HASH_KEY_BYTE_SIZE) {
RTE_LOG(ERR, PMD, "Hash key size mismatch %d",
rss_conf->rss_key_len);
return -EINVAL;
}
if (rss_conf->rss_key)
nicvf_rss_set_key(nic, rss_conf->rss_key);
nic_rss = nicvf_rss_ethdev_to_nic(nic, rss_conf->rss_hf);
nicvf_rss_set_cfg(nic, nic_rss);
return 0;
}
static int
nicvf_qset_cq_alloc(struct rte_eth_dev *dev, struct nicvf *nic,
struct nicvf_rxq *rxq, uint16_t qidx, uint32_t desc_cnt)
{
const struct rte_memzone *rz;
uint32_t ring_size = CMP_QUEUE_SZ_MAX * sizeof(union cq_entry_t);
rz = rte_eth_dma_zone_reserve(dev, "cq_ring",
nicvf_netdev_qidx(nic, qidx), ring_size,
NICVF_CQ_BASE_ALIGN_BYTES, nic->node);
if (rz == NULL) {
PMD_INIT_LOG(ERR, "Failed to allocate mem for cq hw ring");
return -ENOMEM;
}
memset(rz->addr, 0, ring_size);
rxq->phys = rz->iova;
rxq->desc = rz->addr;
rxq->qlen_mask = desc_cnt - 1;
return 0;
}
static int
nicvf_qset_sq_alloc(struct rte_eth_dev *dev, struct nicvf *nic,
struct nicvf_txq *sq, uint16_t qidx, uint32_t desc_cnt)
{
const struct rte_memzone *rz;
uint32_t ring_size = SND_QUEUE_SZ_MAX * sizeof(union sq_entry_t);
rz = rte_eth_dma_zone_reserve(dev, "sq",
nicvf_netdev_qidx(nic, qidx), ring_size,
NICVF_SQ_BASE_ALIGN_BYTES, nic->node);
if (rz == NULL) {
PMD_INIT_LOG(ERR, "Failed allocate mem for sq hw ring");
return -ENOMEM;
}
memset(rz->addr, 0, ring_size);
sq->phys = rz->iova;
sq->desc = rz->addr;
sq->qlen_mask = desc_cnt - 1;
return 0;
}
static int
nicvf_qset_rbdr_alloc(struct rte_eth_dev *dev, struct nicvf *nic,
uint32_t desc_cnt, uint32_t buffsz)
{
struct nicvf_rbdr *rbdr;
const struct rte_memzone *rz;
uint32_t ring_size;
assert(nic->rbdr == NULL);
rbdr = rte_zmalloc_socket("rbdr", sizeof(struct nicvf_rbdr),
RTE_CACHE_LINE_SIZE, nic->node);
if (rbdr == NULL) {
PMD_INIT_LOG(ERR, "Failed to allocate mem for rbdr");
return -ENOMEM;
}
ring_size = sizeof(struct rbdr_entry_t) * RBDR_QUEUE_SZ_MAX;
rz = rte_eth_dma_zone_reserve(dev, "rbdr",
nicvf_netdev_qidx(nic, 0), ring_size,
NICVF_RBDR_BASE_ALIGN_BYTES, nic->node);
if (rz == NULL) {
PMD_INIT_LOG(ERR, "Failed to allocate mem for rbdr desc ring");
return -ENOMEM;
}
memset(rz->addr, 0, ring_size);
rbdr->phys = rz->iova;
rbdr->tail = 0;
rbdr->next_tail = 0;
rbdr->desc = rz->addr;
rbdr->buffsz = buffsz;
rbdr->qlen_mask = desc_cnt - 1;
rbdr->rbdr_status =
nicvf_qset_base(nic, 0) + NIC_QSET_RBDR_0_1_STATUS0;
rbdr->rbdr_door =
nicvf_qset_base(nic, 0) + NIC_QSET_RBDR_0_1_DOOR;
nic->rbdr = rbdr;
return 0;
}
static void
nicvf_rbdr_release_mbuf(struct rte_eth_dev *dev, struct nicvf *nic,
nicvf_iova_addr_t phy)
{
uint16_t qidx;
void *obj;
struct nicvf_rxq *rxq;
uint16_t rx_start, rx_end;
/* Get queue ranges for this VF */
nicvf_rx_range(dev, nic, &rx_start, &rx_end);
for (qidx = rx_start; qidx <= rx_end; qidx++) {
rxq = dev->data->rx_queues[qidx];
if (rxq->precharge_cnt) {
obj = (void *)nicvf_mbuff_phy2virt(phy,
rxq->mbuf_phys_off);
rte_mempool_put(rxq->pool, obj);
rxq->precharge_cnt--;
break;
}
}
}
static inline void
nicvf_rbdr_release_mbufs(struct rte_eth_dev *dev, struct nicvf *nic)
{
uint32_t qlen_mask, head;
struct rbdr_entry_t *entry;
struct nicvf_rbdr *rbdr = nic->rbdr;
qlen_mask = rbdr->qlen_mask;
head = rbdr->head;
while (head != rbdr->tail) {
entry = rbdr->desc + head;
nicvf_rbdr_release_mbuf(dev, nic, entry->full_addr);
head++;
head = head & qlen_mask;
}
}
static inline void
nicvf_tx_queue_release_mbufs(struct nicvf_txq *txq)
{
uint32_t head;
head = txq->head;
while (head != txq->tail) {
if (txq->txbuffs[head]) {
rte_pktmbuf_free_seg(txq->txbuffs[head]);
txq->txbuffs[head] = NULL;
}
head++;
head = head & txq->qlen_mask;
}
}
static void
nicvf_tx_queue_reset(struct nicvf_txq *txq)
{
uint32_t txq_desc_cnt = txq->qlen_mask + 1;
memset(txq->desc, 0, sizeof(union sq_entry_t) * txq_desc_cnt);
memset(txq->txbuffs, 0, sizeof(struct rte_mbuf *) * txq_desc_cnt);
txq->tail = 0;
txq->head = 0;
txq->xmit_bufs = 0;
}
static inline int
nicvf_vf_start_tx_queue(struct rte_eth_dev *dev, struct nicvf *nic,
uint16_t qidx)
{
struct nicvf_txq *txq;
int ret;
assert(qidx < MAX_SND_QUEUES_PER_QS);
if (dev->data->tx_queue_state[nicvf_netdev_qidx(nic, qidx)] ==
RTE_ETH_QUEUE_STATE_STARTED)
return 0;
txq = dev->data->tx_queues[nicvf_netdev_qidx(nic, qidx)];
txq->pool = NULL;
ret = nicvf_qset_sq_config(nic, qidx, txq);
if (ret) {
PMD_INIT_LOG(ERR, "Failed to configure sq VF%d %d %d",
nic->vf_id, qidx, ret);
goto config_sq_error;
}
dev->data->tx_queue_state[nicvf_netdev_qidx(nic, qidx)] =
RTE_ETH_QUEUE_STATE_STARTED;
return ret;
config_sq_error:
nicvf_qset_sq_reclaim(nic, qidx);
return ret;
}
static inline int
nicvf_vf_stop_tx_queue(struct rte_eth_dev *dev, struct nicvf *nic,
uint16_t qidx)
{
struct nicvf_txq *txq;
int ret;
assert(qidx < MAX_SND_QUEUES_PER_QS);
if (dev->data->tx_queue_state[nicvf_netdev_qidx(nic, qidx)] ==
RTE_ETH_QUEUE_STATE_STOPPED)
return 0;
ret = nicvf_qset_sq_reclaim(nic, qidx);
if (ret)
PMD_INIT_LOG(ERR, "Failed to reclaim sq VF%d %d %d",
nic->vf_id, qidx, ret);
txq = dev->data->tx_queues[nicvf_netdev_qidx(nic, qidx)];
nicvf_tx_queue_release_mbufs(txq);
nicvf_tx_queue_reset(txq);
dev->data->tx_queue_state[nicvf_netdev_qidx(nic, qidx)] =
RTE_ETH_QUEUE_STATE_STOPPED;
return ret;
}
static inline int
nicvf_configure_cpi(struct rte_eth_dev *dev)
{
struct nicvf *nic = nicvf_pmd_priv(dev);
uint16_t qidx, qcnt;
int ret;
/* Count started rx queues */
for (qidx = qcnt = 0; qidx < dev->data->nb_rx_queues; qidx++)
if (dev->data->rx_queue_state[qidx] ==
RTE_ETH_QUEUE_STATE_STARTED)
qcnt++;
nic->cpi_alg = CPI_ALG_NONE;
ret = nicvf_mbox_config_cpi(nic, qcnt);
if (ret)
PMD_INIT_LOG(ERR, "Failed to configure CPI %d", ret);
return ret;
}
static inline int
nicvf_configure_rss(struct rte_eth_dev *dev)
{
struct nicvf *nic = nicvf_pmd_priv(dev);
uint64_t rsshf;
int ret = -EINVAL;
rsshf = nicvf_rss_ethdev_to_nic(nic,
dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf);
PMD_DRV_LOG(INFO, "mode=%d rx_queues=%d loopback=%d rsshf=0x%" PRIx64,
dev->data->dev_conf.rxmode.mq_mode,
dev->data->nb_rx_queues,
dev->data->dev_conf.lpbk_mode, rsshf);
if (dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_NONE)
ret = nicvf_rss_term(nic);
else if (dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_RSS)
ret = nicvf_rss_config(nic, dev->data->nb_rx_queues, rsshf);
if (ret)
PMD_INIT_LOG(ERR, "Failed to configure RSS %d", ret);
return ret;
}
static int
nicvf_configure_rss_reta(struct rte_eth_dev *dev)
{
struct nicvf *nic = nicvf_pmd_priv(dev);
unsigned int idx, qmap_size;
uint8_t qmap[RTE_MAX_QUEUES_PER_PORT];
uint8_t default_reta[NIC_MAX_RSS_IDR_TBL_SIZE];
if (nic->cpi_alg != CPI_ALG_NONE)
return -EINVAL;
/* Prepare queue map */
for (idx = 0, qmap_size = 0; idx < dev->data->nb_rx_queues; idx++) {
if (dev->data->rx_queue_state[idx] ==
RTE_ETH_QUEUE_STATE_STARTED)
qmap[qmap_size++] = idx;
}
/* Update default RSS RETA */
for (idx = 0; idx < NIC_MAX_RSS_IDR_TBL_SIZE; idx++)
default_reta[idx] = qmap[idx % qmap_size];
return nicvf_rss_reta_update(nic, default_reta,
NIC_MAX_RSS_IDR_TBL_SIZE);
}
static void
nicvf_dev_tx_queue_release(void *sq)
{
struct nicvf_txq *txq;
PMD_INIT_FUNC_TRACE();
txq = (struct nicvf_txq *)sq;
if (txq) {
if (txq->txbuffs != NULL) {
nicvf_tx_queue_release_mbufs(txq);
rte_free(txq->txbuffs);
txq->txbuffs = NULL;
}
rte_free(txq);
}
}
static void
nicvf_set_tx_function(struct rte_eth_dev *dev)
{
struct nicvf_txq *txq;
size_t i;
bool multiseg = false;
for (i = 0; i < dev->data->nb_tx_queues; i++) {
txq = dev->data->tx_queues[i];
if (txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS) {
multiseg = true;
break;
}
}
/* Use a simple Tx queue (no offloads, no multi segs) if possible */
if (multiseg) {
PMD_DRV_LOG(DEBUG, "Using multi-segment tx callback");
dev->tx_pkt_burst = nicvf_xmit_pkts_multiseg;
} else {
PMD_DRV_LOG(DEBUG, "Using single-segment tx callback");
dev->tx_pkt_burst = nicvf_xmit_pkts;
}
if (txq->pool_free == nicvf_single_pool_free_xmited_buffers)
PMD_DRV_LOG(DEBUG, "Using single-mempool tx free method");
else
PMD_DRV_LOG(DEBUG, "Using multi-mempool tx free method");
}
static void
nicvf_set_rx_function(struct rte_eth_dev *dev)
{
if (dev->data->scattered_rx) {
PMD_DRV_LOG(DEBUG, "Using multi-segment rx callback");
dev->rx_pkt_burst = nicvf_recv_pkts_multiseg;
} else {
PMD_DRV_LOG(DEBUG, "Using single-segment rx callback");
dev->rx_pkt_burst = nicvf_recv_pkts;
}
}
static int
nicvf_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t qidx,
uint16_t nb_desc, unsigned int socket_id,
const struct rte_eth_txconf *tx_conf)
{
uint16_t tx_free_thresh;
bool is_single_pool;
struct nicvf_txq *txq;
struct nicvf *nic = nicvf_pmd_priv(dev);
ethdev: new Rx/Tx offloads API This patch check if a input requested offloading is valid or not. Any reuqested offloading must be supported in the device capabilities. Any offloading is disabled by default if it is not set in the parameter dev_conf->[rt]xmode.offloads to rte_eth_dev_configure() and [rt]x_conf->offloads to rte_eth_[rt]x_queue_setup(). If any offloading is enabled in rte_eth_dev_configure() by application, it is enabled on all queues no matter whether it is per-queue or per-port type and no matter whether it is set or cleared in [rt]x_conf->offloads to rte_eth_[rt]x_queue_setup(). If a per-queue offloading hasn't be enabled in rte_eth_dev_configure(), it can be enabled or disabled for individual queue in ret_eth_[rt]x_queue_setup(). A new added offloading is the one which hasn't been enabled in rte_eth_dev_configure() and is reuqested to be enabled in rte_eth_[rt]x_queue_setup(), it must be per-queue type, otherwise trigger an error log. The underlying PMD must be aware that the requested offloadings to PMD specific queue_setup() function only carries those new added offloadings of per-queue type. This patch can make above such checking in a common way in rte_ethdev layer to avoid same checking in underlying PMD. This patch assumes that all PMDs in 18.05-rc2 have already converted to offload API defined in 17.11 . It also assumes that all PMDs can return correct offloading capabilities in rte_eth_dev_infos_get(). In the beginning of [rt]x_queue_setup() of underlying PMD, add offloads = [rt]xconf->offloads | dev->data->dev_conf.[rt]xmode.offloads; to keep same as offload API defined in 17.11 to avoid upper application broken due to offload API change. PMD can use the info that input [rt]xconf->offloads only carry the new added per-queue offloads to do some optimization or some code change on base of this patch. Signed-off-by: Wei Dai <wei.dai@intel.com> Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com> Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
2018-05-10 11:56:55 +00:00
uint64_t offloads;
PMD_INIT_FUNC_TRACE();
if (qidx >= MAX_SND_QUEUES_PER_QS)
nic = nic->snicvf[qidx / MAX_SND_QUEUES_PER_QS - 1];
qidx = qidx % MAX_SND_QUEUES_PER_QS;
/* Socket id check */
if (socket_id != (unsigned int)SOCKET_ID_ANY && socket_id != nic->node)
PMD_DRV_LOG(WARNING, "socket_id expected %d, configured %d",
socket_id, nic->node);
/* Tx deferred start is not supported */
if (tx_conf->tx_deferred_start) {
PMD_INIT_LOG(ERR, "Tx deferred start not supported");
return -EINVAL;
}
/* Roundup nb_desc to available qsize and validate max number of desc */
nb_desc = nicvf_qsize_sq_roundup(nb_desc);
if (nb_desc == 0) {
PMD_INIT_LOG(ERR, "Value of nb_desc beyond available sq qsize");
return -EINVAL;
}
/* Validate tx_free_thresh */
tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
tx_conf->tx_free_thresh :
NICVF_DEFAULT_TX_FREE_THRESH);
if (tx_free_thresh > (nb_desc) ||
tx_free_thresh > NICVF_MAX_TX_FREE_THRESH) {
PMD_INIT_LOG(ERR,
"tx_free_thresh must be less than the number of TX "
"descriptors. (tx_free_thresh=%u port=%d "
"queue=%d)", (unsigned int)tx_free_thresh,
(int)dev->data->port_id, (int)qidx);
return -EINVAL;
}
/* Free memory prior to re-allocation if needed. */
if (dev->data->tx_queues[nicvf_netdev_qidx(nic, qidx)] != NULL) {
PMD_TX_LOG(DEBUG, "Freeing memory prior to re-allocation %d",
nicvf_netdev_qidx(nic, qidx));
nicvf_dev_tx_queue_release(
dev->data->tx_queues[nicvf_netdev_qidx(nic, qidx)]);
dev->data->tx_queues[nicvf_netdev_qidx(nic, qidx)] = NULL;
}
/* Allocating tx queue data structure */
txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct nicvf_txq),
RTE_CACHE_LINE_SIZE, nic->node);
if (txq == NULL) {
PMD_INIT_LOG(ERR, "Failed to allocate txq=%d",
nicvf_netdev_qidx(nic, qidx));
return -ENOMEM;
}
txq->nic = nic;
txq->queue_id = qidx;
txq->tx_free_thresh = tx_free_thresh;
txq->sq_head = nicvf_qset_base(nic, qidx) + NIC_QSET_SQ_0_7_HEAD;
txq->sq_door = nicvf_qset_base(nic, qidx) + NIC_QSET_SQ_0_7_DOOR;
ethdev: new Rx/Tx offloads API This patch check if a input requested offloading is valid or not. Any reuqested offloading must be supported in the device capabilities. Any offloading is disabled by default if it is not set in the parameter dev_conf->[rt]xmode.offloads to rte_eth_dev_configure() and [rt]x_conf->offloads to rte_eth_[rt]x_queue_setup(). If any offloading is enabled in rte_eth_dev_configure() by application, it is enabled on all queues no matter whether it is per-queue or per-port type and no matter whether it is set or cleared in [rt]x_conf->offloads to rte_eth_[rt]x_queue_setup(). If a per-queue offloading hasn't be enabled in rte_eth_dev_configure(), it can be enabled or disabled for individual queue in ret_eth_[rt]x_queue_setup(). A new added offloading is the one which hasn't been enabled in rte_eth_dev_configure() and is reuqested to be enabled in rte_eth_[rt]x_queue_setup(), it must be per-queue type, otherwise trigger an error log. The underlying PMD must be aware that the requested offloadings to PMD specific queue_setup() function only carries those new added offloadings of per-queue type. This patch can make above such checking in a common way in rte_ethdev layer to avoid same checking in underlying PMD. This patch assumes that all PMDs in 18.05-rc2 have already converted to offload API defined in 17.11 . It also assumes that all PMDs can return correct offloading capabilities in rte_eth_dev_infos_get(). In the beginning of [rt]x_queue_setup() of underlying PMD, add offloads = [rt]xconf->offloads | dev->data->dev_conf.[rt]xmode.offloads; to keep same as offload API defined in 17.11 to avoid upper application broken due to offload API change. PMD can use the info that input [rt]xconf->offloads only carry the new added per-queue offloads to do some optimization or some code change on base of this patch. Signed-off-by: Wei Dai <wei.dai@intel.com> Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com> Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
2018-05-10 11:56:55 +00:00
offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
txq->offloads = offloads;
ethdev: new Rx/Tx offloads API This patch check if a input requested offloading is valid or not. Any reuqested offloading must be supported in the device capabilities. Any offloading is disabled by default if it is not set in the parameter dev_conf->[rt]xmode.offloads to rte_eth_dev_configure() and [rt]x_conf->offloads to rte_eth_[rt]x_queue_setup(). If any offloading is enabled in rte_eth_dev_configure() by application, it is enabled on all queues no matter whether it is per-queue or per-port type and no matter whether it is set or cleared in [rt]x_conf->offloads to rte_eth_[rt]x_queue_setup(). If a per-queue offloading hasn't be enabled in rte_eth_dev_configure(), it can be enabled or disabled for individual queue in ret_eth_[rt]x_queue_setup(). A new added offloading is the one which hasn't been enabled in rte_eth_dev_configure() and is reuqested to be enabled in rte_eth_[rt]x_queue_setup(), it must be per-queue type, otherwise trigger an error log. The underlying PMD must be aware that the requested offloadings to PMD specific queue_setup() function only carries those new added offloadings of per-queue type. This patch can make above such checking in a common way in rte_ethdev layer to avoid same checking in underlying PMD. This patch assumes that all PMDs in 18.05-rc2 have already converted to offload API defined in 17.11 . It also assumes that all PMDs can return correct offloading capabilities in rte_eth_dev_infos_get(). In the beginning of [rt]x_queue_setup() of underlying PMD, add offloads = [rt]xconf->offloads | dev->data->dev_conf.[rt]xmode.offloads; to keep same as offload API defined in 17.11 to avoid upper application broken due to offload API change. PMD can use the info that input [rt]xconf->offloads only carry the new added per-queue offloads to do some optimization or some code change on base of this patch. Signed-off-by: Wei Dai <wei.dai@intel.com> Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com> Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
2018-05-10 11:56:55 +00:00
is_single_pool = !!(offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE);
/* Choose optimum free threshold value for multipool case */
if (!is_single_pool) {
txq->tx_free_thresh = (uint16_t)
(tx_conf->tx_free_thresh == NICVF_DEFAULT_TX_FREE_THRESH ?
NICVF_TX_FREE_MPOOL_THRESH :
tx_conf->tx_free_thresh);
txq->pool_free = nicvf_multi_pool_free_xmited_buffers;
} else {
txq->pool_free = nicvf_single_pool_free_xmited_buffers;
}
/* Allocate software ring */
txq->txbuffs = rte_zmalloc_socket("txq->txbuffs",
nb_desc * sizeof(struct rte_mbuf *),
RTE_CACHE_LINE_SIZE, nic->node);
if (txq->txbuffs == NULL) {
nicvf_dev_tx_queue_release(txq);
return -ENOMEM;
}
if (nicvf_qset_sq_alloc(dev, nic, txq, qidx, nb_desc)) {
PMD_INIT_LOG(ERR, "Failed to allocate mem for sq %d", qidx);
nicvf_dev_tx_queue_release(txq);
return -ENOMEM;
}
nicvf_tx_queue_reset(txq);
PMD_INIT_LOG(DEBUG, "[%d] txq=%p nb_desc=%d desc=%p"
" phys=0x%" PRIx64 " offloads=0x%" PRIx64,
nicvf_netdev_qidx(nic, qidx), txq, nb_desc, txq->desc,
txq->phys, txq->offloads);
dev->data->tx_queues[nicvf_netdev_qidx(nic, qidx)] = txq;
dev->data->tx_queue_state[nicvf_netdev_qidx(nic, qidx)] =
RTE_ETH_QUEUE_STATE_STOPPED;
return 0;
}
static inline void
nicvf_rx_queue_release_mbufs(struct rte_eth_dev *dev, struct nicvf_rxq *rxq)
{
uint32_t rxq_cnt;
uint32_t nb_pkts, released_pkts = 0;
uint32_t refill_cnt = 0;
struct rte_mbuf *rx_pkts[NICVF_MAX_RX_FREE_THRESH];
if (dev->rx_pkt_burst == NULL)
return;
while ((rxq_cnt = nicvf_dev_rx_queue_count(dev,
nicvf_netdev_qidx(rxq->nic, rxq->queue_id)))) {
nb_pkts = dev->rx_pkt_burst(rxq, rx_pkts,
NICVF_MAX_RX_FREE_THRESH);
PMD_DRV_LOG(INFO, "nb_pkts=%d rxq_cnt=%d", nb_pkts, rxq_cnt);
while (nb_pkts) {
rte_pktmbuf_free_seg(rx_pkts[--nb_pkts]);
released_pkts++;
}
}
refill_cnt += nicvf_dev_rbdr_refill(dev,
nicvf_netdev_qidx(rxq->nic, rxq->queue_id));
PMD_DRV_LOG(INFO, "free_cnt=%d refill_cnt=%d",
released_pkts, refill_cnt);
}
static void
nicvf_rx_queue_reset(struct nicvf_rxq *rxq)
{
rxq->head = 0;
rxq->available_space = 0;
rxq->recv_buffers = 0;
}
static inline int
nicvf_vf_start_rx_queue(struct rte_eth_dev *dev, struct nicvf *nic,
uint16_t qidx)
{
struct nicvf_rxq *rxq;
int ret;
assert(qidx < MAX_RCV_QUEUES_PER_QS);
if (dev->data->rx_queue_state[nicvf_netdev_qidx(nic, qidx)] ==
RTE_ETH_QUEUE_STATE_STARTED)
return 0;
/* Update rbdr pointer to all rxq */
rxq = dev->data->rx_queues[nicvf_netdev_qidx(nic, qidx)];
rxq->shared_rbdr = nic->rbdr;
ret = nicvf_qset_rq_config(nic, qidx, rxq);
if (ret) {
PMD_INIT_LOG(ERR, "Failed to configure rq VF%d %d %d",
nic->vf_id, qidx, ret);
goto config_rq_error;
}
ret = nicvf_qset_cq_config(nic, qidx, rxq);
if (ret) {
PMD_INIT_LOG(ERR, "Failed to configure cq VF%d %d %d",
nic->vf_id, qidx, ret);
goto config_cq_error;
}
dev->data->rx_queue_state[nicvf_netdev_qidx(nic, qidx)] =
RTE_ETH_QUEUE_STATE_STARTED;
return 0;
config_cq_error:
nicvf_qset_cq_reclaim(nic, qidx);
config_rq_error:
nicvf_qset_rq_reclaim(nic, qidx);
return ret;
}
static inline int
nicvf_vf_stop_rx_queue(struct rte_eth_dev *dev, struct nicvf *nic,
uint16_t qidx)
{
struct nicvf_rxq *rxq;
int ret, other_error;
if (dev->data->rx_queue_state[nicvf_netdev_qidx(nic, qidx)] ==
RTE_ETH_QUEUE_STATE_STOPPED)
return 0;
ret = nicvf_qset_rq_reclaim(nic, qidx);
if (ret)
PMD_INIT_LOG(ERR, "Failed to reclaim rq VF%d %d %d",
nic->vf_id, qidx, ret);
other_error = ret;
rxq = dev->data->rx_queues[nicvf_netdev_qidx(nic, qidx)];
nicvf_rx_queue_release_mbufs(dev, rxq);
nicvf_rx_queue_reset(rxq);
ret = nicvf_qset_cq_reclaim(nic, qidx);
if (ret)
PMD_INIT_LOG(ERR, "Failed to reclaim cq VF%d %d %d",
nic->vf_id, qidx, ret);
other_error |= ret;
dev->data->rx_queue_state[nicvf_netdev_qidx(nic, qidx)] =
RTE_ETH_QUEUE_STATE_STOPPED;
return other_error;
}
static void
nicvf_dev_rx_queue_release(void *rx_queue)
{
PMD_INIT_FUNC_TRACE();
rte_free(rx_queue);
}
static int
nicvf_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t qidx)
{
struct nicvf *nic = nicvf_pmd_priv(dev);
int ret;
if (qidx >= MAX_RCV_QUEUES_PER_QS)
nic = nic->snicvf[(qidx / MAX_RCV_QUEUES_PER_QS - 1)];
qidx = qidx % MAX_RCV_QUEUES_PER_QS;
ret = nicvf_vf_start_rx_queue(dev, nic, qidx);
if (ret)
return ret;
ret = nicvf_configure_cpi(dev);
if (ret)
return ret;
return nicvf_configure_rss_reta(dev);
}
static int
nicvf_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t qidx)
{
int ret;
struct nicvf *nic = nicvf_pmd_priv(dev);
if (qidx >= MAX_SND_QUEUES_PER_QS)
nic = nic->snicvf[(qidx / MAX_SND_QUEUES_PER_QS - 1)];
qidx = qidx % MAX_RCV_QUEUES_PER_QS;
ret = nicvf_vf_stop_rx_queue(dev, nic, qidx);
ret |= nicvf_configure_cpi(dev);
ret |= nicvf_configure_rss_reta(dev);
return ret;
}
static int
nicvf_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t qidx)
{
struct nicvf *nic = nicvf_pmd_priv(dev);
if (qidx >= MAX_SND_QUEUES_PER_QS)
nic = nic->snicvf[(qidx / MAX_SND_QUEUES_PER_QS - 1)];
qidx = qidx % MAX_SND_QUEUES_PER_QS;
return nicvf_vf_start_tx_queue(dev, nic, qidx);
}
static int
nicvf_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t qidx)
{
struct nicvf *nic = nicvf_pmd_priv(dev);
if (qidx >= MAX_SND_QUEUES_PER_QS)
nic = nic->snicvf[(qidx / MAX_SND_QUEUES_PER_QS - 1)];
qidx = qidx % MAX_SND_QUEUES_PER_QS;
return nicvf_vf_stop_tx_queue(dev, nic, qidx);
}
static inline void
nicvf_rxq_mbuf_setup(struct nicvf_rxq *rxq)
{
uintptr_t p;
struct rte_mbuf mb_def;
struct nicvf *nic = rxq->nic;
RTE_BUILD_BUG_ON(sizeof(union mbuf_initializer) != 8);
RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) % 8 != 0);
RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, refcnt) -
offsetof(struct rte_mbuf, data_off) != 2);
RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, nb_segs) -
offsetof(struct rte_mbuf, data_off) != 4);
RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, port) -
offsetof(struct rte_mbuf, data_off) != 6);
mb_def.nb_segs = 1;
mb_def.data_off = RTE_PKTMBUF_HEADROOM + (nic->skip_bytes);
mb_def.port = rxq->port_id;
rte_mbuf_refcnt_set(&mb_def, 1);
/* Prevent compiler reordering: rearm_data covers previous fields */
rte_compiler_barrier();
p = (uintptr_t)&mb_def.rearm_data;
rxq->mbuf_initializer.value = *(uint64_t *)p;
}
static int
nicvf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t qidx,
uint16_t nb_desc, unsigned int socket_id,
const struct rte_eth_rxconf *rx_conf,
struct rte_mempool *mp)
{
uint16_t rx_free_thresh;
struct nicvf_rxq *rxq;
struct nicvf *nic = nicvf_pmd_priv(dev);
ethdev: new Rx/Tx offloads API This patch check if a input requested offloading is valid or not. Any reuqested offloading must be supported in the device capabilities. Any offloading is disabled by default if it is not set in the parameter dev_conf->[rt]xmode.offloads to rte_eth_dev_configure() and [rt]x_conf->offloads to rte_eth_[rt]x_queue_setup(). If any offloading is enabled in rte_eth_dev_configure() by application, it is enabled on all queues no matter whether it is per-queue or per-port type and no matter whether it is set or cleared in [rt]x_conf->offloads to rte_eth_[rt]x_queue_setup(). If a per-queue offloading hasn't be enabled in rte_eth_dev_configure(), it can be enabled or disabled for individual queue in ret_eth_[rt]x_queue_setup(). A new added offloading is the one which hasn't been enabled in rte_eth_dev_configure() and is reuqested to be enabled in rte_eth_[rt]x_queue_setup(), it must be per-queue type, otherwise trigger an error log. The underlying PMD must be aware that the requested offloadings to PMD specific queue_setup() function only carries those new added offloadings of per-queue type. This patch can make above such checking in a common way in rte_ethdev layer to avoid same checking in underlying PMD. This patch assumes that all PMDs in 18.05-rc2 have already converted to offload API defined in 17.11 . It also assumes that all PMDs can return correct offloading capabilities in rte_eth_dev_infos_get(). In the beginning of [rt]x_queue_setup() of underlying PMD, add offloads = [rt]xconf->offloads | dev->data->dev_conf.[rt]xmode.offloads; to keep same as offload API defined in 17.11 to avoid upper application broken due to offload API change. PMD can use the info that input [rt]xconf->offloads only carry the new added per-queue offloads to do some optimization or some code change on base of this patch. Signed-off-by: Wei Dai <wei.dai@intel.com> Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com> Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
2018-05-10 11:56:55 +00:00
uint64_t offloads;
uint32_t buffsz;
struct rte_pktmbuf_pool_private *mbp_priv;
PMD_INIT_FUNC_TRACE();
/* First skip check */
mbp_priv = rte_mempool_get_priv(mp);
buffsz = mbp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM;
if (buffsz < (uint32_t)(nic->skip_bytes)) {
PMD_INIT_LOG(ERR, "First skip is more than configured buffer size");
return -EINVAL;
}
if (qidx >= MAX_RCV_QUEUES_PER_QS)
nic = nic->snicvf[qidx / MAX_RCV_QUEUES_PER_QS - 1];
qidx = qidx % MAX_RCV_QUEUES_PER_QS;
/* Socket id check */
if (socket_id != (unsigned int)SOCKET_ID_ANY && socket_id != nic->node)
PMD_DRV_LOG(WARNING, "socket_id expected %d, configured %d",
socket_id, nic->node);
/* Mempool memory must be contiguous, so must be one memory segment*/
if (mp->nb_mem_chunks != 1) {
PMD_INIT_LOG(ERR, "Non-contiguous mempool, add more huge pages");
return -EINVAL;
}
/* Mempool memory must be physically contiguous */
if (mp->flags & MEMPOOL_F_NO_IOVA_CONTIG) {
PMD_INIT_LOG(ERR, "Mempool memory must be physically contiguous");
return -EINVAL;
}
/* Rx deferred start is not supported */
if (rx_conf->rx_deferred_start) {
PMD_INIT_LOG(ERR, "Rx deferred start not supported");
return -EINVAL;
}
/* Roundup nb_desc to available qsize and validate max number of desc */
nb_desc = nicvf_qsize_cq_roundup(nb_desc);
if (nb_desc == 0) {
PMD_INIT_LOG(ERR, "Value nb_desc beyond available hw cq qsize");
return -EINVAL;
}
/* Check rx_free_thresh upper bound */
rx_free_thresh = (uint16_t)((rx_conf->rx_free_thresh) ?
rx_conf->rx_free_thresh :
NICVF_DEFAULT_RX_FREE_THRESH);
if (rx_free_thresh > NICVF_MAX_RX_FREE_THRESH ||
rx_free_thresh >= nb_desc * .75) {
PMD_INIT_LOG(ERR, "rx_free_thresh greater than expected %d",
rx_free_thresh);
return -EINVAL;
}
/* Free memory prior to re-allocation if needed */
if (dev->data->rx_queues[nicvf_netdev_qidx(nic, qidx)] != NULL) {
PMD_RX_LOG(DEBUG, "Freeing memory prior to re-allocation %d",
nicvf_netdev_qidx(nic, qidx));
nicvf_dev_rx_queue_release(
dev->data->rx_queues[nicvf_netdev_qidx(nic, qidx)]);
dev->data->rx_queues[nicvf_netdev_qidx(nic, qidx)] = NULL;
}
/* Allocate rxq memory */
rxq = rte_zmalloc_socket("ethdev rx queue", sizeof(struct nicvf_rxq),
RTE_CACHE_LINE_SIZE, nic->node);
if (rxq == NULL) {
PMD_INIT_LOG(ERR, "Failed to allocate rxq=%d",
nicvf_netdev_qidx(nic, qidx));
return -ENOMEM;
}
rxq->nic = nic;
rxq->pool = mp;
rxq->queue_id = qidx;
rxq->port_id = dev->data->port_id;
rxq->rx_free_thresh = rx_free_thresh;
rxq->rx_drop_en = rx_conf->rx_drop_en;
rxq->cq_status = nicvf_qset_base(nic, qidx) + NIC_QSET_CQ_0_7_STATUS;
rxq->cq_door = nicvf_qset_base(nic, qidx) + NIC_QSET_CQ_0_7_DOOR;
rxq->precharge_cnt = 0;
if (nicvf_hw_cap(nic) & NICVF_CAP_CQE_RX2)
rxq->rbptr_offset = NICVF_CQE_RX2_RBPTR_WORD;
else
rxq->rbptr_offset = NICVF_CQE_RBPTR_WORD;
nicvf_rxq_mbuf_setup(rxq);
/* Alloc completion queue */
if (nicvf_qset_cq_alloc(dev, nic, rxq, rxq->queue_id, nb_desc)) {
PMD_INIT_LOG(ERR, "failed to allocate cq %u", rxq->queue_id);
nicvf_dev_rx_queue_release(rxq);
return -ENOMEM;
}
nicvf_rx_queue_reset(rxq);
ethdev: new Rx/Tx offloads API This patch check if a input requested offloading is valid or not. Any reuqested offloading must be supported in the device capabilities. Any offloading is disabled by default if it is not set in the parameter dev_conf->[rt]xmode.offloads to rte_eth_dev_configure() and [rt]x_conf->offloads to rte_eth_[rt]x_queue_setup(). If any offloading is enabled in rte_eth_dev_configure() by application, it is enabled on all queues no matter whether it is per-queue or per-port type and no matter whether it is set or cleared in [rt]x_conf->offloads to rte_eth_[rt]x_queue_setup(). If a per-queue offloading hasn't be enabled in rte_eth_dev_configure(), it can be enabled or disabled for individual queue in ret_eth_[rt]x_queue_setup(). A new added offloading is the one which hasn't been enabled in rte_eth_dev_configure() and is reuqested to be enabled in rte_eth_[rt]x_queue_setup(), it must be per-queue type, otherwise trigger an error log. The underlying PMD must be aware that the requested offloadings to PMD specific queue_setup() function only carries those new added offloadings of per-queue type. This patch can make above such checking in a common way in rte_ethdev layer to avoid same checking in underlying PMD. This patch assumes that all PMDs in 18.05-rc2 have already converted to offload API defined in 17.11 . It also assumes that all PMDs can return correct offloading capabilities in rte_eth_dev_infos_get(). In the beginning of [rt]x_queue_setup() of underlying PMD, add offloads = [rt]xconf->offloads | dev->data->dev_conf.[rt]xmode.offloads; to keep same as offload API defined in 17.11 to avoid upper application broken due to offload API change. PMD can use the info that input [rt]xconf->offloads only carry the new added per-queue offloads to do some optimization or some code change on base of this patch. Signed-off-by: Wei Dai <wei.dai@intel.com> Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com> Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
2018-05-10 11:56:55 +00:00
offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
PMD_INIT_LOG(DEBUG, "[%d] rxq=%p pool=%s nb_desc=(%d/%d)"
" phy=0x%" PRIx64 " offloads=0x%" PRIx64,
nicvf_netdev_qidx(nic, qidx), rxq, mp->name, nb_desc,
ethdev: new Rx/Tx offloads API This patch check if a input requested offloading is valid or not. Any reuqested offloading must be supported in the device capabilities. Any offloading is disabled by default if it is not set in the parameter dev_conf->[rt]xmode.offloads to rte_eth_dev_configure() and [rt]x_conf->offloads to rte_eth_[rt]x_queue_setup(). If any offloading is enabled in rte_eth_dev_configure() by application, it is enabled on all queues no matter whether it is per-queue or per-port type and no matter whether it is set or cleared in [rt]x_conf->offloads to rte_eth_[rt]x_queue_setup(). If a per-queue offloading hasn't be enabled in rte_eth_dev_configure(), it can be enabled or disabled for individual queue in ret_eth_[rt]x_queue_setup(). A new added offloading is the one which hasn't been enabled in rte_eth_dev_configure() and is reuqested to be enabled in rte_eth_[rt]x_queue_setup(), it must be per-queue type, otherwise trigger an error log. The underlying PMD must be aware that the requested offloadings to PMD specific queue_setup() function only carries those new added offloadings of per-queue type. This patch can make above such checking in a common way in rte_ethdev layer to avoid same checking in underlying PMD. This patch assumes that all PMDs in 18.05-rc2 have already converted to offload API defined in 17.11 . It also assumes that all PMDs can return correct offloading capabilities in rte_eth_dev_infos_get(). In the beginning of [rt]x_queue_setup() of underlying PMD, add offloads = [rt]xconf->offloads | dev->data->dev_conf.[rt]xmode.offloads; to keep same as offload API defined in 17.11 to avoid upper application broken due to offload API change. PMD can use the info that input [rt]xconf->offloads only carry the new added per-queue offloads to do some optimization or some code change on base of this patch. Signed-off-by: Wei Dai <wei.dai@intel.com> Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com> Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
2018-05-10 11:56:55 +00:00
rte_mempool_avail_count(mp), rxq->phys, offloads);
dev->data->rx_queues[nicvf_netdev_qidx(nic, qidx)] = rxq;
dev->data->rx_queue_state[nicvf_netdev_qidx(nic, qidx)] =
RTE_ETH_QUEUE_STATE_STOPPED;
return 0;
}
static void
nicvf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
{
struct nicvf *nic = nicvf_pmd_priv(dev);
struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
PMD_INIT_FUNC_TRACE();
/* Autonegotiation may be disabled */
dev_info->speed_capa = ETH_LINK_SPEED_FIXED;
dev_info->speed_capa |= ETH_LINK_SPEED_10M | ETH_LINK_SPEED_100M |
ETH_LINK_SPEED_1G | ETH_LINK_SPEED_10G;
if (nicvf_hw_version(nic) != PCI_SUB_DEVICE_ID_CN81XX_NICVF)
dev_info->speed_capa |= ETH_LINK_SPEED_40G;
dev_info->min_rx_bufsize = ETHER_MIN_MTU;
dev_info->max_rx_pktlen = NIC_HW_MAX_MTU + ETHER_HDR_LEN;
dev_info->max_rx_queues =
(uint16_t)MAX_RCV_QUEUES_PER_QS * (MAX_SQS_PER_VF + 1);
dev_info->max_tx_queues =
(uint16_t)MAX_SND_QUEUES_PER_QS * (MAX_SQS_PER_VF + 1);
dev_info->max_mac_addrs = 1;
dev_info->max_vfs = pci_dev->max_vfs;
dev_info->rx_offload_capa = NICVF_RX_OFFLOAD_CAPA;
dev_info->tx_offload_capa = NICVF_TX_OFFLOAD_CAPA;
dev_info->rx_queue_offload_capa = NICVF_RX_OFFLOAD_CAPA;
dev_info->tx_queue_offload_capa = NICVF_TX_OFFLOAD_CAPA;
dev_info->reta_size = nic->rss_info.rss_size;
dev_info->hash_key_size = RSS_HASH_KEY_BYTE_SIZE;
dev_info->flow_type_rss_offloads = NICVF_RSS_OFFLOAD_PASS1;
if (nicvf_hw_cap(nic) & NICVF_CAP_TUNNEL_PARSING)
dev_info->flow_type_rss_offloads |= NICVF_RSS_OFFLOAD_TUNNEL;
dev_info->default_rxconf = (struct rte_eth_rxconf) {
.rx_free_thresh = NICVF_DEFAULT_RX_FREE_THRESH,
.rx_drop_en = 0,
.offloads = DEV_RX_OFFLOAD_CRC_STRIP,
};
dev_info->default_txconf = (struct rte_eth_txconf) {
.tx_free_thresh = NICVF_DEFAULT_TX_FREE_THRESH,
.offloads = DEV_TX_OFFLOAD_MBUF_FAST_FREE |
DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
DEV_TX_OFFLOAD_UDP_CKSUM |
DEV_TX_OFFLOAD_TCP_CKSUM,
};
}
static nicvf_iova_addr_t
rbdr_rte_mempool_get(void *dev, void *opaque)
{
uint16_t qidx;
uintptr_t mbuf;
struct nicvf_rxq *rxq;
struct rte_eth_dev *eth_dev = (struct rte_eth_dev *)dev;
struct nicvf *nic = (struct nicvf *)opaque;
uint16_t rx_start, rx_end;
/* Get queue ranges for this VF */
nicvf_rx_range(eth_dev, nic, &rx_start, &rx_end);
for (qidx = rx_start; qidx <= rx_end; qidx++) {
rxq = eth_dev->data->rx_queues[qidx];
/* Maintain equal buffer count across all pools */
if (rxq->precharge_cnt >= rxq->qlen_mask)
continue;
rxq->precharge_cnt++;
mbuf = (uintptr_t)rte_pktmbuf_alloc(rxq->pool);
if (mbuf)
return nicvf_mbuff_virt2phy(mbuf, rxq->mbuf_phys_off);
}
return 0;
}
static int
nicvf_vf_start(struct rte_eth_dev *dev, struct nicvf *nic, uint32_t rbdrsz)
{
int ret;
uint16_t qidx, data_off;
uint32_t total_rxq_desc, nb_rbdr_desc, exp_buffs;
uint64_t mbuf_phys_off = 0;
struct nicvf_rxq *rxq;
struct rte_mbuf *mbuf;
uint16_t rx_start, rx_end;
uint16_t tx_start, tx_end;
bool vlan_strip;
PMD_INIT_FUNC_TRACE();
/* Userspace process exited without proper shutdown in last run */
if (nicvf_qset_rbdr_active(nic, 0))
nicvf_vf_stop(dev, nic, false);
/* Get queue ranges for this VF */
nicvf_rx_range(dev, nic, &rx_start, &rx_end);
/*
* Thunderx nicvf PMD can support more than one pool per port only when
* 1) Data payload size is same across all the pools in given port
* AND
* 2) All mbuffs in the pools are from the same hugepage
* AND
* 3) Mbuff metadata size is same across all the pools in given port
*
* This is to support existing application that uses multiple pool/port.
* But, the purpose of using multipool for QoS will not be addressed.
*
*/
/* Validate mempool attributes */
for (qidx = rx_start; qidx <= rx_end; qidx++) {
rxq = dev->data->rx_queues[qidx];
rxq->mbuf_phys_off = nicvf_mempool_phy_offset(rxq->pool);
mbuf = rte_pktmbuf_alloc(rxq->pool);
if (mbuf == NULL) {
PMD_INIT_LOG(ERR, "Failed allocate mbuf VF%d qid=%d "
"pool=%s",
nic->vf_id, qidx, rxq->pool->name);
return -ENOMEM;
}
data_off = nicvf_mbuff_meta_length(mbuf);
data_off += RTE_PKTMBUF_HEADROOM;
rte_pktmbuf_free(mbuf);
if (data_off % RTE_CACHE_LINE_SIZE) {
PMD_INIT_LOG(ERR, "%s: unaligned data_off=%d delta=%d",
rxq->pool->name, data_off,
data_off % RTE_CACHE_LINE_SIZE);
return -EINVAL;
}
rxq->mbuf_phys_off -= data_off;
rxq->mbuf_phys_off -= nic->skip_bytes;
if (mbuf_phys_off == 0)
mbuf_phys_off = rxq->mbuf_phys_off;
if (mbuf_phys_off != rxq->mbuf_phys_off) {
PMD_INIT_LOG(ERR, "pool params not same,%s VF%d %"
PRIx64, rxq->pool->name, nic->vf_id,
mbuf_phys_off);
return -EINVAL;
}
}
/* Check the level of buffers in the pool */
total_rxq_desc = 0;
for (qidx = rx_start; qidx <= rx_end; qidx++) {
rxq = dev->data->rx_queues[qidx];
/* Count total numbers of rxq descs */
total_rxq_desc += rxq->qlen_mask + 1;
exp_buffs = RTE_MEMPOOL_CACHE_MAX_SIZE + rxq->rx_free_thresh;
exp_buffs *= dev->data->nb_rx_queues;
if (rte_mempool_avail_count(rxq->pool) < exp_buffs) {
PMD_INIT_LOG(ERR, "Buff shortage in pool=%s (%d/%d)",
rxq->pool->name,
rte_mempool_avail_count(rxq->pool),
exp_buffs);
return -ENOENT;
}
}
/* Check RBDR desc overflow */
ret = nicvf_qsize_rbdr_roundup(total_rxq_desc);
if (ret == 0) {
PMD_INIT_LOG(ERR, "Reached RBDR desc limit, reduce nr desc "
"VF%d", nic->vf_id);
return -ENOMEM;
}
/* Enable qset */
ret = nicvf_qset_config(nic);
if (ret) {
PMD_INIT_LOG(ERR, "Failed to enable qset %d VF%d", ret,
nic->vf_id);
return ret;
}
/* Allocate RBDR and RBDR ring desc */
nb_rbdr_desc = nicvf_qsize_rbdr_roundup(total_rxq_desc);
ret = nicvf_qset_rbdr_alloc(dev, nic, nb_rbdr_desc, rbdrsz);
if (ret) {
PMD_INIT_LOG(ERR, "Failed to allocate memory for rbdr alloc "
"VF%d", nic->vf_id);
goto qset_reclaim;
}
/* Enable and configure RBDR registers */
ret = nicvf_qset_rbdr_config(nic, 0);
if (ret) {
PMD_INIT_LOG(ERR, "Failed to configure rbdr %d VF%d", ret,
nic->vf_id);
goto qset_rbdr_free;
}
/* Fill rte_mempool buffers in RBDR pool and precharge it */
ret = nicvf_qset_rbdr_precharge(dev, nic, 0, rbdr_rte_mempool_get,
total_rxq_desc);
if (ret) {
PMD_INIT_LOG(ERR, "Failed to fill rbdr %d VF%d", ret,
nic->vf_id);
goto qset_rbdr_reclaim;
}
PMD_DRV_LOG(INFO, "Filled %d out of %d entries in RBDR VF%d",
nic->rbdr->tail, nb_rbdr_desc, nic->vf_id);
/* Configure VLAN Strip */
vlan_strip = !!(dev->data->dev_conf.rxmode.offloads &
DEV_RX_OFFLOAD_VLAN_STRIP);
nicvf_vlan_hw_strip(nic, vlan_strip);
/* Based on the packet type(IPv4 or IPv6), the nicvf HW aligns L3 data
* to the 64bit memory address.
* The alignment creates a hole in mbuf(between the end of headroom and
* packet data start). The new revision of the HW provides an option to
* disable the L3 alignment feature and make mbuf layout looks
* more like other NICs. For better application compatibility, disabling
* l3 alignment feature on the hardware revisions it supports
*/
nicvf_apad_config(nic, false);
/* Get queue ranges for this VF */
nicvf_tx_range(dev, nic, &tx_start, &tx_end);
/* Configure TX queues */
for (qidx = tx_start; qidx <= tx_end; qidx++) {
ret = nicvf_vf_start_tx_queue(dev, nic,
qidx % MAX_SND_QUEUES_PER_QS);
if (ret)
goto start_txq_error;
}
/* Configure RX queues */
for (qidx = rx_start; qidx <= rx_end; qidx++) {
ret = nicvf_vf_start_rx_queue(dev, nic,
qidx % MAX_RCV_QUEUES_PER_QS);
if (ret)
goto start_rxq_error;
}
if (!nic->sqs_mode) {
/* Configure CPI algorithm */
ret = nicvf_configure_cpi(dev);
if (ret)
goto start_txq_error;
ret = nicvf_mbox_get_rss_size(nic);
if (ret) {
PMD_INIT_LOG(ERR, "Failed to get rss table size");
goto qset_rss_error;
}
/* Configure RSS */
ret = nicvf_configure_rss(dev);
if (ret)
goto qset_rss_error;
}
/* Done; Let PF make the BGX's RX and TX switches to ON position */
nicvf_mbox_cfg_done(nic);
return 0;
qset_rss_error:
nicvf_rss_term(nic);
start_rxq_error:
for (qidx = rx_start; qidx <= rx_end; qidx++)
nicvf_vf_stop_rx_queue(dev, nic, qidx % MAX_RCV_QUEUES_PER_QS);
start_txq_error:
for (qidx = tx_start; qidx <= tx_end; qidx++)
nicvf_vf_stop_tx_queue(dev, nic, qidx % MAX_SND_QUEUES_PER_QS);
qset_rbdr_reclaim:
nicvf_qset_rbdr_reclaim(nic, 0);
nicvf_rbdr_release_mbufs(dev, nic);
qset_rbdr_free:
if (nic->rbdr) {
rte_free(nic->rbdr);
nic->rbdr = NULL;
}
qset_reclaim:
nicvf_qset_reclaim(nic);
return ret;
}
static int
nicvf_dev_start(struct rte_eth_dev *dev)
{
uint16_t qidx;
int ret;
size_t i;
struct nicvf *nic = nicvf_pmd_priv(dev);
struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
uint16_t mtu;
uint32_t buffsz = 0, rbdrsz = 0;
struct rte_pktmbuf_pool_private *mbp_priv;
struct nicvf_rxq *rxq;
PMD_INIT_FUNC_TRACE();
/* This function must be called for a primary device */
assert_primary(nic);
/* Validate RBDR buff size */
for (qidx = 0; qidx < dev->data->nb_rx_queues; qidx++) {
rxq = dev->data->rx_queues[qidx];
mbp_priv = rte_mempool_get_priv(rxq->pool);
buffsz = mbp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM;
if (buffsz % 128) {
PMD_INIT_LOG(ERR, "rxbuf size must be multiply of 128");
return -EINVAL;
}
if (rbdrsz == 0)
rbdrsz = buffsz;
if (rbdrsz != buffsz) {
PMD_INIT_LOG(ERR, "buffsz not same, qidx=%d (%d/%d)",
qidx, rbdrsz, buffsz);
return -EINVAL;
}
}
/* Configure loopback */
ret = nicvf_loopback_config(nic, dev->data->dev_conf.lpbk_mode);
if (ret) {
PMD_INIT_LOG(ERR, "Failed to configure loopback %d", ret);
return ret;
}
/* Reset all statistics counters attached to this port */
ret = nicvf_mbox_reset_stat_counters(nic, 0x3FFF, 0x1F, 0xFFFF, 0xFFFF);
if (ret) {
PMD_INIT_LOG(ERR, "Failed to reset stat counters %d", ret);
return ret;
}
/* Setup scatter mode if needed by jumbo */
if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
2 * VLAN_TAG_SIZE > buffsz)
dev->data->scattered_rx = 1;
if ((rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER) != 0)
dev->data->scattered_rx = 1;
/* Setup MTU based on max_rx_pkt_len or default */
mtu = dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME ?
dev->data->dev_conf.rxmode.max_rx_pkt_len
- ETHER_HDR_LEN : ETHER_MTU;
if (nicvf_dev_set_mtu(dev, mtu)) {
PMD_INIT_LOG(ERR, "Failed to set default mtu size");
return -EBUSY;
}
ret = nicvf_vf_start(dev, nic, rbdrsz);
if (ret != 0)
return ret;
for (i = 0; i < nic->sqs_count; i++) {
assert(nic->snicvf[i]);
ret = nicvf_vf_start(dev, nic->snicvf[i], rbdrsz);
if (ret != 0)
return ret;
}
/* Configure callbacks based on scatter mode */
nicvf_set_tx_function(dev);
nicvf_set_rx_function(dev);
return 0;
}
static void
nicvf_dev_stop_cleanup(struct rte_eth_dev *dev, bool cleanup)
{
size_t i;
int ret;
struct nicvf *nic = nicvf_pmd_priv(dev);
PMD_INIT_FUNC_TRACE();
/* Teardown secondary vf first */
for (i = 0; i < nic->sqs_count; i++) {
if (!nic->snicvf[i])
continue;
nicvf_vf_stop(dev, nic->snicvf[i], cleanup);
}
/* Stop the primary VF now */
nicvf_vf_stop(dev, nic, cleanup);
/* Disable loopback */
ret = nicvf_loopback_config(nic, 0);
if (ret)
PMD_INIT_LOG(ERR, "Failed to disable loopback %d", ret);
/* Reclaim CPI configuration */
ret = nicvf_mbox_config_cpi(nic, 0);
if (ret)
PMD_INIT_LOG(ERR, "Failed to reclaim CPI config %d", ret);
}
static void
nicvf_dev_stop(struct rte_eth_dev *dev)
{
PMD_INIT_FUNC_TRACE();
nicvf_dev_stop_cleanup(dev, false);
}
static void
nicvf_vf_stop(struct rte_eth_dev *dev, struct nicvf *nic, bool cleanup)
{
int ret;
uint16_t qidx;
uint16_t tx_start, tx_end;
uint16_t rx_start, rx_end;
PMD_INIT_FUNC_TRACE();
if (cleanup) {
/* Let PF make the BGX's RX and TX switches to OFF position */
nicvf_mbox_shutdown(nic);
}
/* Disable VLAN Strip */
nicvf_vlan_hw_strip(nic, 0);
/* Get queue ranges for this VF */
nicvf_tx_range(dev, nic, &tx_start, &tx_end);
for (qidx = tx_start; qidx <= tx_end; qidx++)
nicvf_vf_stop_tx_queue(dev, nic, qidx % MAX_SND_QUEUES_PER_QS);
/* Get queue ranges for this VF */
nicvf_rx_range(dev, nic, &rx_start, &rx_end);
/* Reclaim rq */
for (qidx = rx_start; qidx <= rx_end; qidx++)
nicvf_vf_stop_rx_queue(dev, nic, qidx % MAX_RCV_QUEUES_PER_QS);
/* Reclaim RBDR */
ret = nicvf_qset_rbdr_reclaim(nic, 0);
if (ret)
PMD_INIT_LOG(ERR, "Failed to reclaim RBDR %d", ret);
/* Move all charged buffers in RBDR back to pool */
if (nic->rbdr != NULL)
nicvf_rbdr_release_mbufs(dev, nic);
/* Disable qset */
ret = nicvf_qset_reclaim(nic);
if (ret)
PMD_INIT_LOG(ERR, "Failed to disable qset %d", ret);
/* Disable all interrupts */
nicvf_disable_all_interrupts(nic);
/* Free RBDR SW structure */
if (nic->rbdr) {
rte_free(nic->rbdr);
nic->rbdr = NULL;
}
}
static void
nicvf_dev_close(struct rte_eth_dev *dev)
{
size_t i;
struct nicvf *nic = nicvf_pmd_priv(dev);
PMD_INIT_FUNC_TRACE();
nicvf_dev_stop_cleanup(dev, true);
nicvf_periodic_alarm_stop(nicvf_interrupt, dev);
for (i = 0; i < nic->sqs_count; i++) {
if (!nic->snicvf[i])
continue;
nicvf_periodic_alarm_stop(nicvf_vf_interrupt, nic->snicvf[i]);
}
}
static int
nicvf_request_sqs(struct nicvf *nic)
{
size_t i;
assert_primary(nic);
assert(nic->sqs_count > 0);
assert(nic->sqs_count <= MAX_SQS_PER_VF);
/* Set no of Rx/Tx queues in each of the SQsets */
for (i = 0; i < nic->sqs_count; i++) {
if (nicvf_svf_empty())
rte_panic("Cannot assign sufficient number of "
"secondary queues to primary VF%" PRIu8 "\n",
nic->vf_id);
nic->snicvf[i] = nicvf_svf_pop();
nic->snicvf[i]->sqs_id = i;
}
return nicvf_mbox_request_sqs(nic);
}
static int
nicvf_dev_configure(struct rte_eth_dev *dev)
{
struct rte_eth_dev_data *data = dev->data;
struct rte_eth_conf *conf = &data->dev_conf;
struct rte_eth_rxmode *rxmode = &conf->rxmode;
struct rte_eth_txmode *txmode = &conf->txmode;
struct nicvf *nic = nicvf_pmd_priv(dev);
uint8_t cqcount;
PMD_INIT_FUNC_TRACE();
if (!rte_eal_has_hugepages()) {
PMD_INIT_LOG(INFO, "Huge page is not configured");
return -EINVAL;
}
ethdev: new Rx/Tx offloads API This patch check if a input requested offloading is valid or not. Any reuqested offloading must be supported in the device capabilities. Any offloading is disabled by default if it is not set in the parameter dev_conf->[rt]xmode.offloads to rte_eth_dev_configure() and [rt]x_conf->offloads to rte_eth_[rt]x_queue_setup(). If any offloading is enabled in rte_eth_dev_configure() by application, it is enabled on all queues no matter whether it is per-queue or per-port type and no matter whether it is set or cleared in [rt]x_conf->offloads to rte_eth_[rt]x_queue_setup(). If a per-queue offloading hasn't be enabled in rte_eth_dev_configure(), it can be enabled or disabled for individual queue in ret_eth_[rt]x_queue_setup(). A new added offloading is the one which hasn't been enabled in rte_eth_dev_configure() and is reuqested to be enabled in rte_eth_[rt]x_queue_setup(), it must be per-queue type, otherwise trigger an error log. The underlying PMD must be aware that the requested offloadings to PMD specific queue_setup() function only carries those new added offloadings of per-queue type. This patch can make above such checking in a common way in rte_ethdev layer to avoid same checking in underlying PMD. This patch assumes that all PMDs in 18.05-rc2 have already converted to offload API defined in 17.11 . It also assumes that all PMDs can return correct offloading capabilities in rte_eth_dev_infos_get(). In the beginning of [rt]x_queue_setup() of underlying PMD, add offloads = [rt]xconf->offloads | dev->data->dev_conf.[rt]xmode.offloads; to keep same as offload API defined in 17.11 to avoid upper application broken due to offload API change. PMD can use the info that input [rt]xconf->offloads only carry the new added per-queue offloads to do some optimization or some code change on base of this patch. Signed-off-by: Wei Dai <wei.dai@intel.com> Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com> Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
2018-05-10 11:56:55 +00:00
if ((rxmode->offloads & DEV_RX_OFFLOAD_CRC_STRIP) == 0) {
PMD_INIT_LOG(NOTICE, "Can't disable hw crc strip");
rxmode->offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
}
if (txmode->mq_mode) {
PMD_INIT_LOG(INFO, "Tx mq_mode DCB or VMDq not supported");
return -EINVAL;
}
if (rxmode->mq_mode != ETH_MQ_RX_NONE &&
rxmode->mq_mode != ETH_MQ_RX_RSS) {
PMD_INIT_LOG(INFO, "Unsupported rx qmode %d", rxmode->mq_mode);
return -EINVAL;
}
if (rxmode->split_hdr_size) {
PMD_INIT_LOG(INFO, "Rxmode does not support split header");
return -EINVAL;
}
if (conf->link_speeds & ETH_LINK_SPEED_FIXED) {
PMD_INIT_LOG(INFO, "Setting link speed/duplex not supported");
return -EINVAL;
}
if (conf->dcb_capability_en) {
PMD_INIT_LOG(INFO, "DCB enable not supported");
return -EINVAL;
}
if (conf->fdir_conf.mode != RTE_FDIR_MODE_NONE) {
PMD_INIT_LOG(INFO, "Flow director not supported");
return -EINVAL;
}
assert_primary(nic);
NICVF_STATIC_ASSERT(MAX_RCV_QUEUES_PER_QS == MAX_SND_QUEUES_PER_QS);
cqcount = RTE_MAX(data->nb_tx_queues, data->nb_rx_queues);
if (cqcount > MAX_RCV_QUEUES_PER_QS) {
nic->sqs_count = RTE_ALIGN_CEIL(cqcount, MAX_RCV_QUEUES_PER_QS);
nic->sqs_count = (nic->sqs_count / MAX_RCV_QUEUES_PER_QS) - 1;
} else {
nic->sqs_count = 0;
}
assert(nic->sqs_count <= MAX_SQS_PER_VF);
if (nic->sqs_count > 0) {
if (nicvf_request_sqs(nic)) {
rte_panic("Cannot assign sufficient number of "
"secondary queues to PORT%d VF%" PRIu8 "\n",
dev->data->port_id, nic->vf_id);
}
}
PMD_INIT_LOG(DEBUG, "Configured ethdev port%d hwcap=0x%" PRIx64,
dev->data->port_id, nicvf_hw_cap(nic));
return 0;
}
/* Initialize and register driver with DPDK Application */
static const struct eth_dev_ops nicvf_eth_dev_ops = {
.dev_configure = nicvf_dev_configure,
.dev_start = nicvf_dev_start,
.dev_stop = nicvf_dev_stop,
.link_update = nicvf_dev_link_update,
.dev_close = nicvf_dev_close,
.stats_get = nicvf_dev_stats_get,
.stats_reset = nicvf_dev_stats_reset,
.promiscuous_enable = nicvf_dev_promisc_enable,
.dev_infos_get = nicvf_dev_info_get,
.dev_supported_ptypes_get = nicvf_dev_supported_ptypes_get,
.mtu_set = nicvf_dev_set_mtu,
.reta_update = nicvf_dev_reta_update,
.reta_query = nicvf_dev_reta_query,
.rss_hash_update = nicvf_dev_rss_hash_update,
.rss_hash_conf_get = nicvf_dev_rss_hash_conf_get,
.rx_queue_start = nicvf_dev_rx_queue_start,
.rx_queue_stop = nicvf_dev_rx_queue_stop,
.tx_queue_start = nicvf_dev_tx_queue_start,
.tx_queue_stop = nicvf_dev_tx_queue_stop,
.rx_queue_setup = nicvf_dev_rx_queue_setup,
.rx_queue_release = nicvf_dev_rx_queue_release,
.rx_queue_count = nicvf_dev_rx_queue_count,
.tx_queue_setup = nicvf_dev_tx_queue_setup,
.tx_queue_release = nicvf_dev_tx_queue_release,
.get_reg = nicvf_dev_get_regs,
};
static inline int
nicvf_set_first_skip(struct rte_eth_dev *dev)
{
int bytes_to_skip = 0;
int ret = 0;
unsigned int i;
struct rte_kvargs *kvlist;
static const char *const skip[] = {
SKIP_DATA_BYTES,
NULL};
struct nicvf *nic = nicvf_pmd_priv(dev);
if (!dev->device->devargs) {
nicvf_first_skip_config(nic, 0);
return ret;
}
kvlist = rte_kvargs_parse(dev->device->devargs->args, skip);
if (!kvlist)
return -EINVAL;
if (kvlist->count == 0)
goto exit;
for (i = 0; i != kvlist->count; ++i) {
const struct rte_kvargs_pair *pair = &kvlist->pairs[i];
if (!strcmp(pair->key, SKIP_DATA_BYTES))
bytes_to_skip = atoi(pair->value);
}
/*128 bytes amounts to one cache line*/
if (bytes_to_skip >= 0 && bytes_to_skip < 128) {
if (!(bytes_to_skip % 8)) {
nicvf_first_skip_config(nic, (bytes_to_skip / 8));
nic->skip_bytes = bytes_to_skip;
goto kvlist_free;
} else {
PMD_INIT_LOG(ERR, "skip_data_bytes should be multiple of 8");
ret = -EINVAL;
goto exit;
}
} else {
PMD_INIT_LOG(ERR, "skip_data_bytes should be less than 128");
ret = -EINVAL;
goto exit;
}
exit:
nicvf_first_skip_config(nic, 0);
kvlist_free:
rte_kvargs_free(kvlist);
return ret;
}
static int
nicvf_eth_dev_init(struct rte_eth_dev *eth_dev)
{
int ret;
struct rte_pci_device *pci_dev;
struct nicvf *nic = nicvf_pmd_priv(eth_dev);
PMD_INIT_FUNC_TRACE();
eth_dev->dev_ops = &nicvf_eth_dev_ops;
/* For secondary processes, the primary has done all the work */
if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
if (nic) {
/* Setup callbacks for secondary process */
nicvf_set_tx_function(eth_dev);
nicvf_set_rx_function(eth_dev);
return 0;
} else {
/* If nic == NULL than it is secondary function
* so ethdev need to be released by caller */
return ENOTSUP;
}
}
pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
rte_eth_copy_pci_info(eth_dev, pci_dev);
nic->device_id = pci_dev->id.device_id;
nic->vendor_id = pci_dev->id.vendor_id;
nic->subsystem_device_id = pci_dev->id.subsystem_device_id;
nic->subsystem_vendor_id = pci_dev->id.subsystem_vendor_id;
PMD_INIT_LOG(DEBUG, "nicvf: device (%x:%x) %u:%u:%u:%u",
pci_dev->id.vendor_id, pci_dev->id.device_id,
pci_dev->addr.domain, pci_dev->addr.bus,
pci_dev->addr.devid, pci_dev->addr.function);
nic->reg_base = (uintptr_t)pci_dev->mem_resource[0].addr;
if (!nic->reg_base) {
PMD_INIT_LOG(ERR, "Failed to map BAR0");
ret = -ENODEV;
goto fail;
}
nicvf_disable_all_interrupts(nic);
ret = nicvf_periodic_alarm_start(nicvf_interrupt, eth_dev);
if (ret) {
PMD_INIT_LOG(ERR, "Failed to start period alarm");
goto fail;
}
ret = nicvf_mbox_check_pf_ready(nic);
if (ret) {
PMD_INIT_LOG(ERR, "Failed to get ready message from PF");
goto alarm_fail;
} else {
PMD_INIT_LOG(INFO,
"node=%d vf=%d mode=%s sqs=%s loopback_supported=%s",
nic->node, nic->vf_id,
nic->tns_mode == NIC_TNS_MODE ? "tns" : "tns-bypass",
nic->sqs_mode ? "true" : "false",
nic->loopback_supported ? "true" : "false"
);
}
ret = nicvf_base_init(nic);
if (ret) {
PMD_INIT_LOG(ERR, "Failed to execute nicvf_base_init");
goto malloc_fail;
}
if (nic->sqs_mode) {
/* Push nic to stack of secondary vfs */
nicvf_svf_push(nic);
/* Steal nic pointer from the device for further reuse */
eth_dev->data->dev_private = NULL;
nicvf_periodic_alarm_stop(nicvf_interrupt, eth_dev);
ret = nicvf_periodic_alarm_start(nicvf_vf_interrupt, nic);
if (ret) {
PMD_INIT_LOG(ERR, "Failed to start period alarm");
goto fail;
}
/* Detach port by returning positive error number */
return ENOTSUP;
}
eth_dev->data->mac_addrs = rte_zmalloc("mac_addr", ETHER_ADDR_LEN, 0);
if (eth_dev->data->mac_addrs == NULL) {
PMD_INIT_LOG(ERR, "Failed to allocate memory for mac addr");
ret = -ENOMEM;
goto alarm_fail;
}
if (is_zero_ether_addr((struct ether_addr *)nic->mac_addr))
eth_random_addr(&nic->mac_addr[0]);
ether_addr_copy((struct ether_addr *)nic->mac_addr,
&eth_dev->data->mac_addrs[0]);
ret = nicvf_mbox_set_mac_addr(nic, nic->mac_addr);
if (ret) {
PMD_INIT_LOG(ERR, "Failed to set mac addr");
goto malloc_fail;
}
ret = nicvf_set_first_skip(eth_dev);
if (ret) {
PMD_INIT_LOG(ERR, "Failed to configure first skip");
goto malloc_fail;
}
PMD_INIT_LOG(INFO, "Port %d (%x:%x) mac=%02x:%02x:%02x:%02x:%02x:%02x",
eth_dev->data->port_id, nic->vendor_id, nic->device_id,
nic->mac_addr[0], nic->mac_addr[1], nic->mac_addr[2],
nic->mac_addr[3], nic->mac_addr[4], nic->mac_addr[5]);
return 0;
malloc_fail:
rte_free(eth_dev->data->mac_addrs);
alarm_fail:
nicvf_periodic_alarm_stop(nicvf_interrupt, eth_dev);
fail:
return ret;
}
static const struct rte_pci_id pci_id_nicvf_map[] = {
{
.class_id = RTE_CLASS_ANY_ID,
.vendor_id = PCI_VENDOR_ID_CAVIUM,
.device_id = PCI_DEVICE_ID_THUNDERX_CN88XX_PASS1_NICVF,
.subsystem_vendor_id = PCI_VENDOR_ID_CAVIUM,
.subsystem_device_id = PCI_SUB_DEVICE_ID_CN88XX_PASS1_NICVF,
},
{
.class_id = RTE_CLASS_ANY_ID,
.vendor_id = PCI_VENDOR_ID_CAVIUM,
.device_id = PCI_DEVICE_ID_THUNDERX_NICVF,
.subsystem_vendor_id = PCI_VENDOR_ID_CAVIUM,
.subsystem_device_id = PCI_SUB_DEVICE_ID_CN88XX_PASS2_NICVF,
},
{
.class_id = RTE_CLASS_ANY_ID,
.vendor_id = PCI_VENDOR_ID_CAVIUM,
.device_id = PCI_DEVICE_ID_THUNDERX_NICVF,
.subsystem_vendor_id = PCI_VENDOR_ID_CAVIUM,
.subsystem_device_id = PCI_SUB_DEVICE_ID_CN81XX_NICVF,
},
{
.class_id = RTE_CLASS_ANY_ID,
.vendor_id = PCI_VENDOR_ID_CAVIUM,
.device_id = PCI_DEVICE_ID_THUNDERX_NICVF,
.subsystem_vendor_id = PCI_VENDOR_ID_CAVIUM,
.subsystem_device_id = PCI_SUB_DEVICE_ID_CN83XX_NICVF,
},
{
.vendor_id = 0,
},
};
static int nicvf_eth_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
struct rte_pci_device *pci_dev)
{
return rte_eth_dev_pci_generic_probe(pci_dev, sizeof(struct nicvf),
nicvf_eth_dev_init);
}
static int nicvf_eth_pci_remove(struct rte_pci_device *pci_dev)
{
return rte_eth_dev_pci_generic_remove(pci_dev, NULL);
}
static struct rte_pci_driver rte_nicvf_pmd = {
.id_table = pci_id_nicvf_map,
.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_KEEP_MAPPED_RES |
RTE_PCI_DRV_INTR_LSC,
.probe = nicvf_eth_pci_probe,
.remove = nicvf_eth_pci_remove,
};
RTE_PMD_REGISTER_PCI(net_thunderx, rte_nicvf_pmd);
RTE_PMD_REGISTER_PCI_TABLE(net_thunderx, pci_id_nicvf_map);
RTE_PMD_REGISTER_KMOD_DEP(net_thunderx, "* igb_uio | uio_pci_generic | vfio-pci");
RTE_PMD_REGISTER_PARAM_STRING(net_thunderx, SKIP_DATA_BYTES "=<int>");