numam-dpdk/lib/librte_mbuf/rte_mbuf_dyn.c
Thomas Monjalon 4630290af4 mbuf: move pool pointer in first half
According to the Technical Board decision
(http://mails.dpdk.org/archives/dev/2020-November/191859.html),
the mempool pointer in the mbuf struct is moved
from the second to the first half.
It may increase performance in some cases
on systems having 64-byte cache line, i.e. mbuf split in two cache lines.

Due to this change, all fields after "pool" are moved up.
Hopefully no vector data path is impacted.

Moving this field gives more space to dynfield1
while dropping the temporary dynfield0.

This is how the mbuf layout looks like (pahole-style):

word  type                              name                byte  size
 0    void *                            buf_addr;         /*   0 +  8 */
 1    rte_iova_t                        buf_iova          /*   8 +  8 */
      /* --- RTE_MARKER64               rearm_data;                   */
 2    uint16_t                          data_off;         /*  16 +  2 */
      uint16_t                          refcnt;           /*  18 +  2 */
      uint16_t                          nb_segs;          /*  20 +  2 */
      uint16_t                          port;             /*  22 +  2 */
 3    uint64_t                          ol_flags;         /*  24 +  8 */
      /* --- RTE_MARKER                 rx_descriptor_fields1;        */
 4    uint32_t             union        packet_type;      /*  32 +  4 */
      uint32_t                          pkt_len;          /*  36 +  4 */
 5    uint16_t                          data_len;         /*  40 +  2 */
      uint16_t                          vlan_tci;         /*  42 +  2 */
 5.5  uint64_t             union        hash;             /*  44 +  8 */
 6.5  uint16_t                          vlan_tci_outer;   /*  52 +  2 */
      uint16_t                          buf_len;          /*  54 +  2 */
 7    struct rte_mempool *              pool;             /*  56 +  8 */
      /* --- RTE_MARKER                 cacheline1;                   */
 8    struct rte_mbuf *                 next;             /*  64 +  8 */
 9    uint64_t             union        tx_offload;       /*  72 +  8 */
10    struct rte_mbuf_ext_shared_info * shinfo;           /*  80 +  8 */
11    uint16_t                          priv_size;        /*  88 +  2 */
      uint16_t                          timesync;         /*  90 +  2 */
11.5  uint32_t                          dynfield1[9];     /*  92 + 36 */
16    /* --- END                                             128      */

Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Olivier Matz <olivier.matz@6wind.com>
Acked-by: Jerin Jacob <jerinj@marvell.com>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
2020-11-12 16:39:10 +01:00

614 lines
14 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright 2019 6WIND S.A.
*/
#include <sys/queue.h>
#include <stdint.h>
#include <limits.h>
#include <rte_common.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
#include <rte_tailq.h>
#include <rte_errno.h>
#include <rte_malloc.h>
#include <rte_string_fns.h>
#include <rte_bitops.h>
#include <rte_mbuf.h>
#include <rte_mbuf_dyn.h>
#define RTE_MBUF_DYN_MZNAME "rte_mbuf_dyn"
struct mbuf_dynfield_elt {
struct rte_mbuf_dynfield params;
size_t offset;
};
TAILQ_HEAD(mbuf_dynfield_list, rte_tailq_entry);
static struct rte_tailq_elem mbuf_dynfield_tailq = {
.name = "RTE_MBUF_DYNFIELD",
};
EAL_REGISTER_TAILQ(mbuf_dynfield_tailq);
struct mbuf_dynflag_elt {
struct rte_mbuf_dynflag params;
unsigned int bitnum;
};
TAILQ_HEAD(mbuf_dynflag_list, rte_tailq_entry);
static struct rte_tailq_elem mbuf_dynflag_tailq = {
.name = "RTE_MBUF_DYNFLAG",
};
EAL_REGISTER_TAILQ(mbuf_dynflag_tailq);
struct mbuf_dyn_shm {
/**
* For each mbuf byte, free_space[i] != 0 if space is free.
* The value is the size of the biggest aligned element that
* can fit in the zone.
*/
uint8_t free_space[sizeof(struct rte_mbuf)];
/** Bitfield of available flags. */
uint64_t free_flags;
};
static struct mbuf_dyn_shm *shm;
/* Set the value of free_space[] according to the size and alignment of
* the free areas. This helps to select the best place when reserving a
* dynamic field. Assume tailq is locked.
*/
static void
process_score(void)
{
size_t off, align, size, i;
/* first, erase previous info */
for (i = 0; i < sizeof(struct rte_mbuf); i++) {
if (shm->free_space[i])
shm->free_space[i] = 1;
}
off = 0;
while (off < sizeof(struct rte_mbuf)) {
/* get the size of the free zone */
for (size = 0; (off + size) < sizeof(struct rte_mbuf) &&
shm->free_space[off + size]; size++)
;
if (size == 0) {
off++;
continue;
}
/* get the alignment of biggest object that can fit in
* the zone at this offset.
*/
for (align = 1;
(off % (align << 1)) == 0 && (align << 1) <= size;
align <<= 1)
;
/* save it in free_space[] */
for (i = off; i < off + align; i++)
shm->free_space[i] = RTE_MAX(align, shm->free_space[i]);
off += align;
}
}
/* Mark the area occupied by a mbuf field as available in the shm. */
#define mark_free(field) \
memset(&shm->free_space[offsetof(struct rte_mbuf, field)], \
1, sizeof(((struct rte_mbuf *)0)->field))
/* Allocate and initialize the shared memory. Assume tailq is locked */
static int
init_shared_mem(void)
{
const struct rte_memzone *mz;
uint64_t mask;
if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
mz = rte_memzone_reserve_aligned(RTE_MBUF_DYN_MZNAME,
sizeof(struct mbuf_dyn_shm),
SOCKET_ID_ANY, 0,
RTE_CACHE_LINE_SIZE);
} else {
mz = rte_memzone_lookup(RTE_MBUF_DYN_MZNAME);
}
if (mz == NULL)
return -1;
shm = mz->addr;
if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
/* init free_space, keep it sync'd with
* rte_mbuf_dynfield_copy().
*/
memset(shm, 0, sizeof(*shm));
mark_free(dynfield1);
/* init free_flags */
for (mask = PKT_FIRST_FREE; mask <= PKT_LAST_FREE; mask <<= 1)
shm->free_flags |= mask;
process_score();
}
return 0;
}
/* check if this offset can be used */
static int
check_offset(size_t offset, size_t size, size_t align)
{
size_t i;
if ((offset & (align - 1)) != 0)
return -1;
if (offset + size > sizeof(struct rte_mbuf))
return -1;
for (i = 0; i < size; i++) {
if (!shm->free_space[i + offset])
return -1;
}
return 0;
}
/* assume tailq is locked */
static struct mbuf_dynfield_elt *
__mbuf_dynfield_lookup(const char *name)
{
struct mbuf_dynfield_list *mbuf_dynfield_list;
struct mbuf_dynfield_elt *mbuf_dynfield;
struct rte_tailq_entry *te;
mbuf_dynfield_list = RTE_TAILQ_CAST(
mbuf_dynfield_tailq.head, mbuf_dynfield_list);
TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
mbuf_dynfield = (struct mbuf_dynfield_elt *)te->data;
if (strcmp(name, mbuf_dynfield->params.name) == 0)
break;
}
if (te == NULL || mbuf_dynfield == NULL) {
rte_errno = ENOENT;
return NULL;
}
return mbuf_dynfield;
}
int
rte_mbuf_dynfield_lookup(const char *name, struct rte_mbuf_dynfield *params)
{
struct mbuf_dynfield_elt *mbuf_dynfield;
rte_mcfg_tailq_read_lock();
if (shm == NULL && init_shared_mem() < 0)
mbuf_dynfield = NULL;
else
mbuf_dynfield = __mbuf_dynfield_lookup(name);
rte_mcfg_tailq_read_unlock();
if (mbuf_dynfield == NULL)
return -1;
if (params != NULL)
memcpy(params, &mbuf_dynfield->params, sizeof(*params));
return mbuf_dynfield->offset;
}
static int mbuf_dynfield_cmp(const struct rte_mbuf_dynfield *params1,
const struct rte_mbuf_dynfield *params2)
{
if (strcmp(params1->name, params2->name))
return -1;
if (params1->size != params2->size)
return -1;
if (params1->align != params2->align)
return -1;
if (params1->flags != params2->flags)
return -1;
return 0;
}
/* assume tailq is locked */
static int
__rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield *params,
size_t req)
{
struct mbuf_dynfield_list *mbuf_dynfield_list;
struct mbuf_dynfield_elt *mbuf_dynfield = NULL;
struct rte_tailq_entry *te = NULL;
unsigned int best_zone = UINT_MAX;
size_t i, offset;
int ret;
if (shm == NULL && init_shared_mem() < 0)
return -1;
mbuf_dynfield = __mbuf_dynfield_lookup(params->name);
if (mbuf_dynfield != NULL) {
if (req != SIZE_MAX && req != mbuf_dynfield->offset) {
rte_errno = EEXIST;
return -1;
}
if (mbuf_dynfield_cmp(params, &mbuf_dynfield->params) < 0) {
rte_errno = EEXIST;
return -1;
}
return mbuf_dynfield->offset;
}
if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
rte_errno = EPERM;
return -1;
}
if (req == SIZE_MAX) {
/* Find the best place to put this field: we search the
* lowest value of shm->free_space[offset]: the zones
* containing room for larger fields are kept for later.
*/
for (offset = 0;
offset < sizeof(struct rte_mbuf);
offset++) {
if (check_offset(offset, params->size,
params->align) == 0 &&
shm->free_space[offset] < best_zone) {
best_zone = shm->free_space[offset];
req = offset;
}
}
if (req == SIZE_MAX) {
rte_errno = ENOENT;
return -1;
}
} else {
if (check_offset(req, params->size, params->align) < 0) {
rte_errno = EBUSY;
return -1;
}
}
offset = req;
mbuf_dynfield_list = RTE_TAILQ_CAST(
mbuf_dynfield_tailq.head, mbuf_dynfield_list);
te = rte_zmalloc("MBUF_DYNFIELD_TAILQ_ENTRY", sizeof(*te), 0);
if (te == NULL) {
rte_errno = ENOMEM;
return -1;
}
mbuf_dynfield = rte_zmalloc("mbuf_dynfield", sizeof(*mbuf_dynfield), 0);
if (mbuf_dynfield == NULL) {
rte_free(te);
rte_errno = ENOMEM;
return -1;
}
ret = strlcpy(mbuf_dynfield->params.name, params->name,
sizeof(mbuf_dynfield->params.name));
if (ret < 0 || ret >= (int)sizeof(mbuf_dynfield->params.name)) {
rte_errno = ENAMETOOLONG;
rte_free(mbuf_dynfield);
rte_free(te);
return -1;
}
memcpy(&mbuf_dynfield->params, params, sizeof(mbuf_dynfield->params));
mbuf_dynfield->offset = offset;
te->data = mbuf_dynfield;
TAILQ_INSERT_TAIL(mbuf_dynfield_list, te, next);
for (i = offset; i < offset + params->size; i++)
shm->free_space[i] = 0;
process_score();
RTE_LOG(DEBUG, MBUF, "Registered dynamic field %s (sz=%zu, al=%zu, fl=0x%x) -> %zd\n",
params->name, params->size, params->align, params->flags,
offset);
return offset;
}
int
rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield *params,
size_t req)
{
int ret;
if (params->size >= sizeof(struct rte_mbuf)) {
rte_errno = EINVAL;
return -1;
}
if (!rte_is_power_of_2(params->align)) {
rte_errno = EINVAL;
return -1;
}
if (params->flags != 0) {
rte_errno = EINVAL;
return -1;
}
rte_mcfg_tailq_write_lock();
ret = __rte_mbuf_dynfield_register_offset(params, req);
rte_mcfg_tailq_write_unlock();
return ret;
}
int
rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params)
{
return rte_mbuf_dynfield_register_offset(params, SIZE_MAX);
}
/* assume tailq is locked */
static struct mbuf_dynflag_elt *
__mbuf_dynflag_lookup(const char *name)
{
struct mbuf_dynflag_list *mbuf_dynflag_list;
struct mbuf_dynflag_elt *mbuf_dynflag;
struct rte_tailq_entry *te;
mbuf_dynflag_list = RTE_TAILQ_CAST(
mbuf_dynflag_tailq.head, mbuf_dynflag_list);
TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
mbuf_dynflag = (struct mbuf_dynflag_elt *)te->data;
if (strncmp(name, mbuf_dynflag->params.name,
RTE_MBUF_DYN_NAMESIZE) == 0)
break;
}
if (te == NULL) {
rte_errno = ENOENT;
return NULL;
}
return mbuf_dynflag;
}
int
rte_mbuf_dynflag_lookup(const char *name,
struct rte_mbuf_dynflag *params)
{
struct mbuf_dynflag_elt *mbuf_dynflag;
rte_mcfg_tailq_read_lock();
if (shm == NULL && init_shared_mem() < 0)
mbuf_dynflag = NULL;
else
mbuf_dynflag = __mbuf_dynflag_lookup(name);
rte_mcfg_tailq_read_unlock();
if (mbuf_dynflag == NULL)
return -1;
if (params != NULL)
memcpy(params, &mbuf_dynflag->params, sizeof(*params));
return mbuf_dynflag->bitnum;
}
static int mbuf_dynflag_cmp(const struct rte_mbuf_dynflag *params1,
const struct rte_mbuf_dynflag *params2)
{
if (strcmp(params1->name, params2->name))
return -1;
if (params1->flags != params2->flags)
return -1;
return 0;
}
/* assume tailq is locked */
static int
__rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
unsigned int req)
{
struct mbuf_dynflag_list *mbuf_dynflag_list;
struct mbuf_dynflag_elt *mbuf_dynflag = NULL;
struct rte_tailq_entry *te = NULL;
unsigned int bitnum;
int ret;
if (shm == NULL && init_shared_mem() < 0)
return -1;
mbuf_dynflag = __mbuf_dynflag_lookup(params->name);
if (mbuf_dynflag != NULL) {
if (req != UINT_MAX && req != mbuf_dynflag->bitnum) {
rte_errno = EEXIST;
return -1;
}
if (mbuf_dynflag_cmp(params, &mbuf_dynflag->params) < 0) {
rte_errno = EEXIST;
return -1;
}
return mbuf_dynflag->bitnum;
}
if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
rte_errno = EPERM;
return -1;
}
if (req == UINT_MAX) {
if (shm->free_flags == 0) {
rte_errno = ENOENT;
return -1;
}
bitnum = rte_bsf64(shm->free_flags);
} else {
if ((shm->free_flags & (1ULL << req)) == 0) {
rte_errno = EBUSY;
return -1;
}
bitnum = req;
}
mbuf_dynflag_list = RTE_TAILQ_CAST(
mbuf_dynflag_tailq.head, mbuf_dynflag_list);
te = rte_zmalloc("MBUF_DYNFLAG_TAILQ_ENTRY", sizeof(*te), 0);
if (te == NULL) {
rte_errno = ENOMEM;
return -1;
}
mbuf_dynflag = rte_zmalloc("mbuf_dynflag", sizeof(*mbuf_dynflag), 0);
if (mbuf_dynflag == NULL) {
rte_free(te);
rte_errno = ENOMEM;
return -1;
}
ret = strlcpy(mbuf_dynflag->params.name, params->name,
sizeof(mbuf_dynflag->params.name));
if (ret < 0 || ret >= (int)sizeof(mbuf_dynflag->params.name)) {
rte_free(mbuf_dynflag);
rte_free(te);
rte_errno = ENAMETOOLONG;
return -1;
}
mbuf_dynflag->bitnum = bitnum;
te->data = mbuf_dynflag;
TAILQ_INSERT_TAIL(mbuf_dynflag_list, te, next);
shm->free_flags &= ~(1ULL << bitnum);
RTE_LOG(DEBUG, MBUF, "Registered dynamic flag %s (fl=0x%x) -> %u\n",
params->name, params->flags, bitnum);
return bitnum;
}
int
rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
unsigned int req)
{
int ret;
if (req >= RTE_SIZEOF_FIELD(struct rte_mbuf, ol_flags) * CHAR_BIT &&
req != UINT_MAX) {
rte_errno = EINVAL;
return -1;
}
rte_mcfg_tailq_write_lock();
ret = __rte_mbuf_dynflag_register_bitnum(params, req);
rte_mcfg_tailq_write_unlock();
return ret;
}
int
rte_mbuf_dynflag_register(const struct rte_mbuf_dynflag *params)
{
return rte_mbuf_dynflag_register_bitnum(params, UINT_MAX);
}
void rte_mbuf_dyn_dump(FILE *out)
{
struct mbuf_dynfield_list *mbuf_dynfield_list;
struct mbuf_dynfield_elt *dynfield;
struct mbuf_dynflag_list *mbuf_dynflag_list;
struct mbuf_dynflag_elt *dynflag;
struct rte_tailq_entry *te;
size_t i;
rte_mcfg_tailq_write_lock();
init_shared_mem();
fprintf(out, "Reserved fields:\n");
mbuf_dynfield_list = RTE_TAILQ_CAST(
mbuf_dynfield_tailq.head, mbuf_dynfield_list);
TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
dynfield = (struct mbuf_dynfield_elt *)te->data;
fprintf(out, " name=%s offset=%zd size=%zd align=%zd flags=%x\n",
dynfield->params.name, dynfield->offset,
dynfield->params.size, dynfield->params.align,
dynfield->params.flags);
}
fprintf(out, "Reserved flags:\n");
mbuf_dynflag_list = RTE_TAILQ_CAST(
mbuf_dynflag_tailq.head, mbuf_dynflag_list);
TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
dynflag = (struct mbuf_dynflag_elt *)te->data;
fprintf(out, " name=%s bitnum=%u flags=%x\n",
dynflag->params.name, dynflag->bitnum,
dynflag->params.flags);
}
fprintf(out, "Free space in mbuf (0 = occupied, value = free zone alignment):\n");
for (i = 0; i < sizeof(struct rte_mbuf); i++) {
if ((i % 8) == 0)
fprintf(out, " %4.4zx: ", i);
fprintf(out, "%2.2x%s", shm->free_space[i],
(i % 8 != 7) ? " " : "\n");
}
fprintf(out, "Free bit in mbuf->ol_flags (0 = occupied, 1 = free):\n");
for (i = 0; i < sizeof(uint64_t) * CHAR_BIT; i++) {
if ((i % 8) == 0)
fprintf(out, " %4.4zx: ", i);
fprintf(out, "%1.1x%s", (shm->free_flags & (1ULL << i)) ? 1 : 0,
(i % 8 != 7) ? " " : "\n");
}
rte_mcfg_tailq_write_unlock();
}
static int
rte_mbuf_dyn_timestamp_register(int *field_offset, uint64_t *flag,
const char *direction, const char *flag_name)
{
static const struct rte_mbuf_dynfield field_desc = {
.name = RTE_MBUF_DYNFIELD_TIMESTAMP_NAME,
.size = sizeof(rte_mbuf_timestamp_t),
.align = __alignof__(rte_mbuf_timestamp_t),
};
struct rte_mbuf_dynflag flag_desc = {};
int offset;
offset = rte_mbuf_dynfield_register(&field_desc);
if (offset < 0) {
RTE_LOG(ERR, MBUF,
"Failed to register mbuf field for timestamp\n");
return -1;
}
if (field_offset != NULL)
*field_offset = offset;
strlcpy(flag_desc.name, flag_name, sizeof(flag_desc.name));
offset = rte_mbuf_dynflag_register(&flag_desc);
if (offset < 0) {
RTE_LOG(ERR, MBUF,
"Failed to register mbuf flag for %s timestamp\n",
direction);
return -1;
}
if (flag != NULL)
*flag = RTE_BIT64(offset);
return 0;
}
int
rte_mbuf_dyn_rx_timestamp_register(int *field_offset, uint64_t *rx_flag)
{
return rte_mbuf_dyn_timestamp_register(field_offset, rx_flag,
"Rx", RTE_MBUF_DYNFLAG_RX_TIMESTAMP_NAME);
}
int
rte_mbuf_dyn_tx_timestamp_register(int *field_offset, uint64_t *tx_flag)
{
return rte_mbuf_dyn_timestamp_register(field_offset, tx_flag,
"Tx", RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME);
}