diff --git a/doc/guides/nics/mlx4.rst b/doc/guides/nics/mlx4.rst index cd34838f41..c8a02be4dd 100644 --- a/doc/guides/nics/mlx4.rst +++ b/doc/guides/nics/mlx4.rst @@ -119,6 +119,17 @@ Run-time configuration times for additional ports. All ports are probed by default if left unspecified. +- ``mr_ext_memseg_en`` parameter [int] + + A nonzero value enables extending memseg when registering DMA memory. If + enabled, the number of entries in MR (Memory Region) lookup table on datapath + is minimized and it benefits performance. On the other hand, it worsens memory + utilization because registered memory is pinned by kernel driver. Even if a + page in the extended chunk is freed, that doesn't become reusable until the + entire memory is freed. + + Enabled by default. + Kernel module parameters ~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index 315640a6d7..252658fc6a 100644 --- a/drivers/net/mlx4/mlx4.c +++ b/drivers/net/mlx4/mlx4.c @@ -66,11 +66,14 @@ struct mlx4_conf { uint32_t present; /**< Bit-field for existing ports. */ uint32_t enabled; /**< Bit-field for user-enabled ports. */ } ports; + int mr_ext_memseg_en; + /** Whether memseg should be extended for MR creation. */ }; /* Available parameters list. */ const char *pmd_mlx4_init_params[] = { MLX4_PMD_PORT_KVARG, + MLX4_MR_EXT_MEMSEG_EN_KVARG, NULL, }; @@ -509,6 +512,8 @@ mlx4_arg_parse(const char *key, const char *val, struct mlx4_conf *conf) return -rte_errno; } conf->ports.enabled |= 1 << tmp; + } else if (strcmp(MLX4_MR_EXT_MEMSEG_EN_KVARG, key) == 0) { + conf->mr_ext_memseg_en = !!tmp; } else { rte_errno = EINVAL; WARN("%s: unknown parameter", key); @@ -544,10 +549,10 @@ mlx4_args(struct rte_devargs *devargs, struct mlx4_conf *conf) } /* Process parameters. */ for (i = 0; pmd_mlx4_init_params[i]; ++i) { - arg_count = rte_kvargs_count(kvlist, MLX4_PMD_PORT_KVARG); + arg_count = rte_kvargs_count(kvlist, pmd_mlx4_init_params[i]); while (arg_count-- > 0) { ret = rte_kvargs_process(kvlist, - MLX4_PMD_PORT_KVARG, + pmd_mlx4_init_params[i], (int (*)(const char *, const char *, void *)) @@ -876,6 +881,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) struct ibv_device_attr_ex device_attr_ex; struct mlx4_conf conf = { .ports.present = 0, + .mr_ext_memseg_en = 1, }; unsigned int vf; int i; @@ -1100,6 +1106,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) device_attr_ex.tso_caps.max_tso; DEBUG("TSO is %ssupported", priv->tso ? "" : "not "); + priv->mr_ext_memseg_en = conf.mr_ext_memseg_en; /* Configure the first MAC address by default. */ err = mlx4_get_mac(priv, &mac.addr_bytes); if (err) { diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index 1a7b1fb541..4ff98d772b 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -53,6 +53,9 @@ /** Port parameter. */ #define MLX4_PMD_PORT_KVARG "port" +/** Enable extending memsegs when creating a MR. */ +#define MLX4_MR_EXT_MEMSEG_EN_KVARG "mr_ext_memseg_en" + /* Reserved address space for UAR mapping. */ #define MLX4_UAR_SIZE (1ULL << (sizeof(uintptr_t) * 4)) @@ -165,6 +168,8 @@ struct mlx4_priv { uint32_t hw_csum_l2tun:1; /**< Checksum support for L2 tunnels. */ uint32_t hw_fcs_strip:1; /**< FCS stripping toggling is supported. */ uint32_t tso:1; /**< Transmit segmentation offload is supported. */ + uint32_t mr_ext_memseg_en:1; + /** Whether memseg should be extended for MR creation. */ uint32_t tso_max_payload_sz; /**< Max supported TSO payload size. */ uint32_t hw_rss_max_qps; /**< Max Rx Queues supported by RSS. */ uint64_t hw_rss_sup; /**< Supported RSS hash fields (Verbs format). */ diff --git a/drivers/net/mlx4/mlx4_mr.c b/drivers/net/mlx4/mlx4_mr.c index 0ba55fda04..6db917a092 100644 --- a/drivers/net/mlx4/mlx4_mr.c +++ b/drivers/net/mlx4/mlx4_mr.c @@ -580,14 +580,24 @@ mlx4_mr_create(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry, */ mlx4_mr_garbage_collect(dev); /* - * Find out a contiguous virtual address chunk in use, to which the - * given address belongs, in order to register maximum range. In the - * best case where mempools are not dynamically recreated and + * If enabled, find out a contiguous virtual address chunk in use, to + * which the given address belongs, in order to register maximum range. + * In the best case where mempools are not dynamically recreated and * '--socket-mem' is specified as an EAL option, it is very likely to * have only one MR(LKey) per a socket and per a hugepage-size even - * though the system memory is highly fragmented. + * though the system memory is highly fragmented. As the whole memory + * chunk will be pinned by kernel, it can't be reused unless entire + * chunk is freed from EAL. + * + * If disabled, just register one memseg (page). Then, memory + * consumption will be minimized but it may drop performance if there + * are many MRs to lookup on the datapath. */ - if (!rte_memseg_contig_walk(mr_find_contig_memsegs_cb, &data)) { + if (!priv->mr_ext_memseg_en) { + data.msl = rte_mem_virt2memseg_list((void *)addr); + data.start = RTE_ALIGN_FLOOR(addr, data.msl->page_sz); + data.end = data.start + data.msl->page_sz; + } else if (!rte_memseg_contig_walk(mr_find_contig_memsegs_cb, &data)) { WARN("port %u unable to find virtually contiguous" " chunk for address (%p)." " rte_memseg_contig_walk() failed.",