diff --git a/doc/guides/prog_guide/link_bonding_poll_mode_drv_lib.rst b/doc/guides/prog_guide/link_bonding_poll_mode_drv_lib.rst index 03baf90d38..46f0296e2d 100644 --- a/doc/guides/prog_guide/link_bonding_poll_mode_drv_lib.rst +++ b/doc/guides/prog_guide/link_bonding_poll_mode_drv_lib.rst @@ -1,5 +1,5 @@ .. BSD LICENSE - Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + Copyright(c) 2010-2015 Intel Corporation. All rights reserved. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -173,7 +173,28 @@ After a slave device is added to a bonded device slave is stopped using ``rte_eth_dev_stop`` and then reconfigured using ``rte_eth_dev_configure`` the RX and TX queues are also reconfigured using ``rte_eth_tx_queue_setup`` / ``rte_eth_rx_queue_setup`` with the parameters use to configure the bonding -device. +device. If RSS is enabled for bonding device, this mode is also enabled on new +slave and configured as well. + +Setting up multi-queue mode for bonding device to RSS, makes it fully +RSS-capable, so all slaves are synchronized with its configuration. This mode is +intended to provide RSS configuration on slaves transparent for client +application implementation. + +Bonding device stores its own version of RSS settings i.e. RETA, RSS hash +function and RSS key, used to set up its slaves. That let to define the meaning +of RSS configuration of bonding device as desired configuration of whole bonding +(as one unit), without pointing any of slave inside. It is required to ensure +consistency and made it more errorproof. + +RSS hash function set for bonding device, is a maximal set of RSS hash functions +supported by all bonded slaves. RETA size is a GCD of all its RETA's sizes, so +it can be easily used as a pattern providing expected behavior, even if slave +RETAs' sizes are different. If RSS Key is not set for bonded device, it's not +changed on the slaves and default key for device is used. + +All settings are managed through the bonding port API and always are propagated +in one direction (from bonding to slaves). Link Status Change Interrupts / Polling ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -207,6 +228,15 @@ these parameters. A bonding device must have a minimum of one slave before the bonding device itself can be started. +To use a bonding device dynamic RSS configuration feature effectively, it is +also required, that all slaves should be RSS-capable and support, at least one +common hash function available for each of them. Changing RSS key is only +possible, when all slave devices support the same key size. + +To prevent inconsistency on how slaves process packets, once a device is added +to a bonding device, RSS configuration should be managed through the bonding +device API, and not directly on the slave. + Like all other PMD, all functions exported by a PMD are lock-free functions that are assumed not to be invoked in parallel on different logical cores to work on the same target object. diff --git a/doc/guides/rel_notes/release_2_2.rst b/doc/guides/rel_notes/release_2_2.rst index 0b8a263954..03d93d61f9 100644 --- a/doc/guides/rel_notes/release_2_2.rst +++ b/doc/guides/rel_notes/release_2_2.rst @@ -4,6 +4,8 @@ DPDK Release 2.2 New Features ------------ +* **Added RSS dynamic configuration to bonding.** + * **Added e1000 Rx interrupt support.** * **Added igb TSO support for both PF and VF.** diff --git a/drivers/net/bonding/rte_eth_bond_api.c b/drivers/net/bonding/rte_eth_bond_api.c index 0681d1adc4..92073dff51 100644 --- a/drivers/net/bonding/rte_eth_bond_api.c +++ b/drivers/net/bonding/rte_eth_bond_api.c @@ -273,6 +273,9 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id) internals->rx_offload_capa = 0; internals->tx_offload_capa = 0; + /* Initially allow to choose any offload type */ + internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK; + memset(internals->active_slaves, 0, sizeof(internals->active_slaves)); memset(internals->slaves, 0, sizeof(internals->slaves)); @@ -369,6 +372,11 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id) rte_eth_dev_info_get(slave_port_id, &dev_info); + /* We need to store slaves reta_size to be able to synchronize RETA for all + * slave devices even if its sizes are different. + */ + internals->slaves[internals->slave_count].reta_size = dev_info.reta_size; + if (internals->slave_count < 1) { /* if MAC is not user defined then use MAC of first slave add to * bonded device */ @@ -382,9 +390,16 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id) /* Make primary slave */ internals->primary_port = slave_port_id; + /* Inherit queues settings from first slave */ + internals->nb_rx_queues = slave_eth_dev->data->nb_rx_queues; + internals->nb_tx_queues = slave_eth_dev->data->nb_tx_queues; + + internals->reta_size = dev_info.reta_size; + /* Take the first dev's offload capabilities */ internals->rx_offload_capa = dev_info.rx_offload_capa; internals->tx_offload_capa = dev_info.tx_offload_capa; + internals->flow_type_rss_offloads = dev_info.flow_type_rss_offloads; } else { /* Check slave link properties are supported if props are set, @@ -403,8 +418,19 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id) } internals->rx_offload_capa &= dev_info.rx_offload_capa; internals->tx_offload_capa &= dev_info.tx_offload_capa; + internals->flow_type_rss_offloads &= dev_info.flow_type_rss_offloads; + + /* RETA size is GCD of all slaves RETA sizes, so, if all sizes will be + * the power of 2, the lower one is GCD + */ + if (internals->reta_size > dev_info.reta_size) + internals->reta_size = dev_info.reta_size; + } + bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf &= + internals->flow_type_rss_offloads; + internals->slave_count++; /* Update all slave devices MACs*/ @@ -531,6 +557,8 @@ __eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id) if (internals->slave_count == 0) { internals->rx_offload_capa = 0; internals->tx_offload_capa = 0; + internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK; + internals->reta_size = 0; } return 0; } diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c index 5cc637239e..2880f5c182 100644 --- a/drivers/net/bonding/rte_eth_bond_pmd.c +++ b/drivers/net/bonding/rte_eth_bond_pmd.c @@ -1310,6 +1310,23 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, if (slave_eth_dev->driver->pci_drv.drv_flags & RTE_PCI_DRV_INTR_LSC) slave_eth_dev->data->dev_conf.intr_conf.lsc = 1; + /* If RSS is enabled for bonding, try to enable it for slaves */ + if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) { + if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len + != 0) { + slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = + bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len; + slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = + bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; + } else { + slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL; + } + + slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = + bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; + slave_eth_dev->data->dev_conf.rxmode.mq_mode |= ETH_MQ_RX_RSS; + } + /* Configure device */ errval = rte_eth_dev_configure(slave_eth_dev->data->port_id, bonded_eth_dev->data->nb_rx_queues, @@ -1361,6 +1378,30 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, return -1; } + /* If RSS is enabled for bonding, synchronize RETA */ + if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) { + int i; + struct bond_dev_private *internals; + + internals = bonded_eth_dev->data->dev_private; + + for (i = 0; i < internals->slave_count; i++) { + if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) { + errval = rte_eth_dev_rss_reta_update( + slave_eth_dev->data->port_id, + &internals->reta_conf[0], + internals->slaves[i].reta_size); + if (errval != 0) { + RTE_LOG(WARNING, PMD, + "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)." + " RSS Configuration for bonding may be inconsistent.\n", + slave_eth_dev->data->port_id, errval); + } + break; + } + } + } + /* If lsc interrupt is set, check initial slave's link status */ if (slave_eth_dev->driver->pci_drv.drv_flags & RTE_PCI_DRV_INTR_LSC) bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id, @@ -1596,6 +1637,9 @@ bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->rx_offload_capa = internals->rx_offload_capa; dev_info->tx_offload_capa = internals->tx_offload_capa; + dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads; + + dev_info->reta_size = internals->reta_size; } static int @@ -1977,21 +2021,132 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type, } } +static int +bond_ethdev_rss_reta_update(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size) +{ + unsigned i, j; + int result = 0; + int slave_reta_size; + unsigned reta_count; + struct bond_dev_private *internals = dev->data->dev_private; + + if (reta_size != internals->reta_size) + return -EINVAL; + + /* Copy RETA table */ + reta_count = reta_size / RTE_RETA_GROUP_SIZE; + + for (i = 0; i < reta_count; i++) { + internals->reta_conf[i].mask = reta_conf[i].mask; + for (j = 0; j < RTE_RETA_GROUP_SIZE; j++) + if ((reta_conf[i].mask >> j) & 0x01) + internals->reta_conf[i].reta[j] = reta_conf[i].reta[j]; + } + + /* Fill rest of array */ + for (; i < RTE_DIM(internals->reta_conf); i += reta_count) + memcpy(&internals->reta_conf[i], &internals->reta_conf[0], + sizeof(internals->reta_conf[0]) * reta_count); + + /* Propagate RETA over slaves */ + for (i = 0; i < internals->slave_count; i++) { + slave_reta_size = internals->slaves[i].reta_size; + result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id, + &internals->reta_conf[0], slave_reta_size); + if (result < 0) + return result; + } + + return 0; +} + +static int +bond_ethdev_rss_reta_query(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size) +{ + int i, j; + struct bond_dev_private *internals = dev->data->dev_private; + + if (reta_size != internals->reta_size) + return -EINVAL; + + /* Copy RETA table */ + for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++) + for (j = 0; j < RTE_RETA_GROUP_SIZE; j++) + if ((reta_conf[i].mask >> j) & 0x01) + reta_conf[i].reta[j] = internals->reta_conf[i].reta[j]; + + return 0; +} + +static int +bond_ethdev_rss_hash_update(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf) +{ + int i, result = 0; + struct bond_dev_private *internals = dev->data->dev_private; + struct rte_eth_rss_conf bond_rss_conf; + + memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf)); + + bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads; + + if (bond_rss_conf.rss_hf != 0) + dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf; + + if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len < + sizeof(internals->rss_key)) { + if (bond_rss_conf.rss_key_len == 0) + bond_rss_conf.rss_key_len = 40; + internals->rss_key_len = bond_rss_conf.rss_key_len; + memcpy(internals->rss_key, bond_rss_conf.rss_key, + internals->rss_key_len); + } + + for (i = 0; i < internals->slave_count; i++) { + result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id, + &bond_rss_conf); + if (result < 0) + return result; + } + + return 0; +} + +static int +bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf) +{ + struct bond_dev_private *internals = dev->data->dev_private; + + rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; + rss_conf->rss_key_len = internals->rss_key_len; + if (rss_conf->rss_key) + memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len); + + return 0; +} + struct eth_dev_ops default_dev_ops = { - .dev_start = bond_ethdev_start, - .dev_stop = bond_ethdev_stop, - .dev_close = bond_ethdev_close, - .dev_configure = bond_ethdev_configure, - .dev_infos_get = bond_ethdev_info, - .rx_queue_setup = bond_ethdev_rx_queue_setup, - .tx_queue_setup = bond_ethdev_tx_queue_setup, - .rx_queue_release = bond_ethdev_rx_queue_release, - .tx_queue_release = bond_ethdev_tx_queue_release, - .link_update = bond_ethdev_link_update, - .stats_get = bond_ethdev_stats_get, - .stats_reset = bond_ethdev_stats_reset, - .promiscuous_enable = bond_ethdev_promiscuous_enable, - .promiscuous_disable = bond_ethdev_promiscuous_disable + .dev_start = bond_ethdev_start, + .dev_stop = bond_ethdev_stop, + .dev_close = bond_ethdev_close, + .dev_configure = bond_ethdev_configure, + .dev_infos_get = bond_ethdev_info, + .rx_queue_setup = bond_ethdev_rx_queue_setup, + .tx_queue_setup = bond_ethdev_tx_queue_setup, + .rx_queue_release = bond_ethdev_rx_queue_release, + .tx_queue_release = bond_ethdev_tx_queue_release, + .link_update = bond_ethdev_link_update, + .stats_get = bond_ethdev_stats_get, + .stats_reset = bond_ethdev_stats_reset, + .promiscuous_enable = bond_ethdev_promiscuous_enable, + .promiscuous_disable = bond_ethdev_promiscuous_disable, + .reta_update = bond_ethdev_rss_reta_update, + .reta_query = bond_ethdev_rss_reta_query, + .rss_hash_update = bond_ethdev_rss_hash_update, + .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get }; static int @@ -2090,6 +2245,28 @@ bond_ethdev_configure(struct rte_eth_dev *dev) int arg_count; uint8_t port_id = dev - rte_eth_devices; + static const uint8_t default_rss_key[40] = { + 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D, + 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4, + 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B, + 0xBE, 0xAC, 0x01, 0xFA + }; + + unsigned i, j; + + /* If RSS is enabled, fill table and key with default values */ + if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) { + dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key; + dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0; + memcpy(internals->rss_key, default_rss_key, 40); + + for (i = 0; i < RTE_DIM(internals->reta_conf); i++) { + internals->reta_conf[i].mask = ~0LL; + for (j = 0; j < RTE_RETA_GROUP_SIZE; j++) + internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues; + } + } + /* * if no kvlist, it means that this bonded device has been created * through the bonding api. diff --git a/drivers/net/bonding/rte_eth_bond_private.h b/drivers/net/bonding/rte_eth_bond_private.h index 038bca64ff..e7af809991 100644 --- a/drivers/net/bonding/rte_eth_bond_private.h +++ b/drivers/net/bonding/rte_eth_bond_private.h @@ -103,6 +103,8 @@ struct bond_slave_details { uint8_t last_link_status; /**< Port Id of slave eth_dev */ struct ether_addr persisted_mac_addr; + + uint16_t reta_size; }; @@ -155,6 +157,16 @@ struct bond_dev_private { uint32_t rx_offload_capa; /** Rx offload capability */ uint32_t tx_offload_capa; /** Tx offload capability */ + /** Bit mask of RSS offloads, the bit offset also means flow type */ + uint64_t flow_type_rss_offloads; + + uint16_t reta_size; + struct rte_eth_rss_reta_entry64 reta_conf[ETH_RSS_RETA_SIZE_512 / + RTE_RETA_GROUP_SIZE]; + + uint8_t rss_key[52]; /**< 52-byte hash key buffer. */ + uint8_t rss_key_len; /**< hash key length in bytes. */ + struct rte_kvargs *kvlist; uint8_t slave_update_idx; };