net/mlx5: retry on link update failure

mlx5_link_update immediately returns when called with no-wait parameter
and its call for retrieving the link status returns with EAGAIN error.
This is too harsh on busy systems where a first call fails with EAGAIN
from time to time.
This patch adds a (very limited) retry on such cases in order to allow
retrieving the link status.

Signed-off-by: Moti Haimovsky <motih@mellanox.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
This commit is contained in:
Moti Haimovsky 2019-10-16 10:34:03 +03:00 committed by Ferruh Yigit
parent f3d0c07b09
commit 06ee157848
2 changed files with 6 additions and 2 deletions

View File

@ -105,6 +105,9 @@
/* Timeout in seconds to get a valid link status. */
#define MLX5_LINK_STATUS_TIMEOUT 10
/* Number of times to retry retrieving the physical link information. */
#define MLX5_GET_LINK_STATUS_RETRY_COUNT 3
/* Maximum number of UAR pages used by a port,
* These are the size and mask for an array of mutexes used to synchronize
* the access to port's UARs on platforms that do not support 64 bit writes.

View File

@ -999,6 +999,7 @@ mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete)
int ret;
struct rte_eth_link dev_link;
time_t start_time = time(NULL);
int retry = MLX5_GET_LINK_STATUS_RETRY_COUNT;
do {
ret = mlx5_link_update_unlocked_gs(dev, &dev_link);
@ -1007,7 +1008,7 @@ mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete)
if (ret == 0)
break;
/* Handle wait to complete situation. */
if (wait_to_complete && ret == -EAGAIN) {
if ((wait_to_complete || retry) && ret == -EAGAIN) {
if (abs((int)difftime(time(NULL), start_time)) <
MLX5_LINK_STATUS_TIMEOUT) {
usleep(0);
@ -1019,7 +1020,7 @@ mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete)
} else if (ret < 0) {
return ret;
}
} while (wait_to_complete);
} while (wait_to_complete || retry-- > 0);
ret = !!memcmp(&dev->data->dev_link, &dev_link,
sizeof(struct rte_eth_link));
dev->data->dev_link = dev_link;