numam-dpdk/lib/pdump/rte_pdump.c
Konstantin Ananyev 53caecb844 pdump: fix freeing statistics memzone
rte_pdump_init() always allocates new memzone for pdump_stats.
Though rte_pdump_uninit() never frees it.
So the following combination will always fail:
rte_pdump_init(); rte_pdump_uninit(); rte_pdump_init();
The issue was caught by pdump_autotest UT.
While first test run successful, any consecutive runs
of this test-case will fail.
Fix the issue by calling rte_memzone_free() for statistics memzone.

Fixes: 10f726efe26c ("pdump: support pcapng and filtering")

Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Acked-by: Reshma Pattan <reshma.pattan@intel.com>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
2021-11-03 12:53:03 +01:00

764 lines
18 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2016-2018 Intel Corporation
*/
#include <rte_memcpy.h>
#include <rte_mbuf.h>
#include <rte_ethdev.h>
#include <rte_lcore.h>
#include <rte_log.h>
#include <rte_memzone.h>
#include <rte_errno.h>
#include <rte_string_fns.h>
#include <rte_pcapng.h>
#include "rte_pdump.h"
RTE_LOG_REGISTER_DEFAULT(pdump_logtype, NOTICE);
/* Macro for printing using RTE_LOG */
#define PDUMP_LOG(level, fmt, args...) \
rte_log(RTE_LOG_ ## level, pdump_logtype, "%s(): " fmt, \
__func__, ## args)
/* Used for the multi-process communication */
#define PDUMP_MP "mp_pdump"
enum pdump_operation {
DISABLE = 1,
ENABLE = 2
};
/* Internal version number in request */
enum pdump_version {
V1 = 1, /* no filtering or snap */
V2 = 2,
};
struct pdump_request {
uint16_t ver;
uint16_t op;
uint32_t flags;
char device[RTE_DEV_NAME_MAX_LEN];
uint16_t queue;
struct rte_ring *ring;
struct rte_mempool *mp;
const struct rte_bpf_prm *prm;
uint32_t snaplen;
};
struct pdump_response {
uint16_t ver;
uint16_t res_op;
int32_t err_value;
};
static struct pdump_rxtx_cbs {
struct rte_ring *ring;
struct rte_mempool *mp;
const struct rte_eth_rxtx_callback *cb;
const struct rte_bpf *filter;
enum pdump_version ver;
uint32_t snaplen;
} rx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT],
tx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];
/*
* The packet capture statistics keep track of packets
* accepted, filtered and dropped. These are per-queue
* and in memory between primary and secondary processes.
*/
static const char MZ_RTE_PDUMP_STATS[] = "rte_pdump_stats";
static struct {
struct rte_pdump_stats rx[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];
struct rte_pdump_stats tx[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];
const struct rte_memzone *mz;
} *pdump_stats;
/* Create a clone of mbuf to be placed into ring. */
static void
pdump_copy(uint16_t port_id, uint16_t queue,
enum rte_pcapng_direction direction,
struct rte_mbuf **pkts, uint16_t nb_pkts,
const struct pdump_rxtx_cbs *cbs,
struct rte_pdump_stats *stats)
{
unsigned int i;
int ring_enq;
uint16_t d_pkts = 0;
struct rte_mbuf *dup_bufs[nb_pkts];
uint64_t ts;
struct rte_ring *ring;
struct rte_mempool *mp;
struct rte_mbuf *p;
uint64_t rcs[nb_pkts];
if (cbs->filter)
rte_bpf_exec_burst(cbs->filter, (void **)pkts, rcs, nb_pkts);
ts = rte_get_tsc_cycles();
ring = cbs->ring;
mp = cbs->mp;
for (i = 0; i < nb_pkts; i++) {
/*
* This uses same BPF return value convention as socket filter
* and pcap_offline_filter.
* if program returns zero
* then packet doesn't match the filter (will be ignored).
*/
if (cbs->filter && rcs[i] == 0) {
__atomic_fetch_add(&stats->filtered,
1, __ATOMIC_RELAXED);
continue;
}
/*
* If using pcapng then want to wrap packets
* otherwise a simple copy.
*/
if (cbs->ver == V2)
p = rte_pcapng_copy(port_id, queue,
pkts[i], mp, cbs->snaplen,
ts, direction);
else
p = rte_pktmbuf_copy(pkts[i], mp, 0, cbs->snaplen);
if (unlikely(p == NULL))
__atomic_fetch_add(&stats->nombuf, 1, __ATOMIC_RELAXED);
else
dup_bufs[d_pkts++] = p;
}
__atomic_fetch_add(&stats->accepted, d_pkts, __ATOMIC_RELAXED);
ring_enq = rte_ring_enqueue_burst(ring, (void *)dup_bufs, d_pkts, NULL);
if (unlikely(ring_enq < d_pkts)) {
unsigned int drops = d_pkts - ring_enq;
__atomic_fetch_add(&stats->ringfull, drops, __ATOMIC_RELAXED);
rte_pktmbuf_free_bulk(&dup_bufs[ring_enq], drops);
}
}
static uint16_t
pdump_rx(uint16_t port, uint16_t queue,
struct rte_mbuf **pkts, uint16_t nb_pkts,
uint16_t max_pkts __rte_unused, void *user_params)
{
const struct pdump_rxtx_cbs *cbs = user_params;
struct rte_pdump_stats *stats = &pdump_stats->rx[port][queue];
pdump_copy(port, queue, RTE_PCAPNG_DIRECTION_IN,
pkts, nb_pkts, cbs, stats);
return nb_pkts;
}
static uint16_t
pdump_tx(uint16_t port, uint16_t queue,
struct rte_mbuf **pkts, uint16_t nb_pkts, void *user_params)
{
const struct pdump_rxtx_cbs *cbs = user_params;
struct rte_pdump_stats *stats = &pdump_stats->tx[port][queue];
pdump_copy(port, queue, RTE_PCAPNG_DIRECTION_OUT,
pkts, nb_pkts, cbs, stats);
return nb_pkts;
}
static int
pdump_register_rx_callbacks(enum pdump_version ver,
uint16_t end_q, uint16_t port, uint16_t queue,
struct rte_ring *ring, struct rte_mempool *mp,
struct rte_bpf *filter,
uint16_t operation, uint32_t snaplen)
{
uint16_t qid;
qid = (queue == RTE_PDUMP_ALL_QUEUES) ? 0 : queue;
for (; qid < end_q; qid++) {
struct pdump_rxtx_cbs *cbs = &rx_cbs[port][qid];
if (operation == ENABLE) {
if (cbs->cb) {
PDUMP_LOG(ERR,
"rx callback for port=%d queue=%d, already exists\n",
port, qid);
return -EEXIST;
}
cbs->ver = ver;
cbs->ring = ring;
cbs->mp = mp;
cbs->snaplen = snaplen;
cbs->filter = filter;
cbs->cb = rte_eth_add_first_rx_callback(port, qid,
pdump_rx, cbs);
if (cbs->cb == NULL) {
PDUMP_LOG(ERR,
"failed to add rx callback, errno=%d\n",
rte_errno);
return rte_errno;
}
} else if (operation == DISABLE) {
int ret;
if (cbs->cb == NULL) {
PDUMP_LOG(ERR,
"no existing rx callback for port=%d queue=%d\n",
port, qid);
return -EINVAL;
}
ret = rte_eth_remove_rx_callback(port, qid, cbs->cb);
if (ret < 0) {
PDUMP_LOG(ERR,
"failed to remove rx callback, errno=%d\n",
-ret);
return ret;
}
cbs->cb = NULL;
}
}
return 0;
}
static int
pdump_register_tx_callbacks(enum pdump_version ver,
uint16_t end_q, uint16_t port, uint16_t queue,
struct rte_ring *ring, struct rte_mempool *mp,
struct rte_bpf *filter,
uint16_t operation, uint32_t snaplen)
{
uint16_t qid;
qid = (queue == RTE_PDUMP_ALL_QUEUES) ? 0 : queue;
for (; qid < end_q; qid++) {
struct pdump_rxtx_cbs *cbs = &tx_cbs[port][qid];
if (operation == ENABLE) {
if (cbs->cb) {
PDUMP_LOG(ERR,
"tx callback for port=%d queue=%d, already exists\n",
port, qid);
return -EEXIST;
}
cbs->ver = ver;
cbs->ring = ring;
cbs->mp = mp;
cbs->snaplen = snaplen;
cbs->filter = filter;
cbs->cb = rte_eth_add_tx_callback(port, qid, pdump_tx,
cbs);
if (cbs->cb == NULL) {
PDUMP_LOG(ERR,
"failed to add tx callback, errno=%d\n",
rte_errno);
return rte_errno;
}
} else if (operation == DISABLE) {
int ret;
if (cbs->cb == NULL) {
PDUMP_LOG(ERR,
"no existing tx callback for port=%d queue=%d\n",
port, qid);
return -EINVAL;
}
ret = rte_eth_remove_tx_callback(port, qid, cbs->cb);
if (ret < 0) {
PDUMP_LOG(ERR,
"failed to remove tx callback, errno=%d\n",
-ret);
return ret;
}
cbs->cb = NULL;
}
}
return 0;
}
static int
set_pdump_rxtx_cbs(const struct pdump_request *p)
{
uint16_t nb_rx_q = 0, nb_tx_q = 0, end_q, queue;
uint16_t port;
int ret = 0;
struct rte_bpf *filter = NULL;
uint32_t flags;
uint16_t operation;
struct rte_ring *ring;
struct rte_mempool *mp;
/* Check for possible DPDK version mismatch */
if (!(p->ver == V1 || p->ver == V2)) {
PDUMP_LOG(ERR,
"incorrect client version %u\n", p->ver);
return -EINVAL;
}
if (p->prm) {
if (p->prm->prog_arg.type != RTE_BPF_ARG_PTR_MBUF) {
PDUMP_LOG(ERR,
"invalid BPF program type: %u\n",
p->prm->prog_arg.type);
return -EINVAL;
}
filter = rte_bpf_load(p->prm);
if (filter == NULL) {
PDUMP_LOG(ERR, "cannot load BPF filter: %s\n",
rte_strerror(rte_errno));
return -rte_errno;
}
}
flags = p->flags;
operation = p->op;
queue = p->queue;
ring = p->ring;
mp = p->mp;
ret = rte_eth_dev_get_port_by_name(p->device, &port);
if (ret < 0) {
PDUMP_LOG(ERR,
"failed to get port id for device id=%s\n",
p->device);
return -EINVAL;
}
/* validation if packet capture is for all queues */
if (queue == RTE_PDUMP_ALL_QUEUES) {
struct rte_eth_dev_info dev_info;
ret = rte_eth_dev_info_get(port, &dev_info);
if (ret != 0) {
PDUMP_LOG(ERR,
"Error during getting device (port %u) info: %s\n",
port, strerror(-ret));
return ret;
}
nb_rx_q = dev_info.nb_rx_queues;
nb_tx_q = dev_info.nb_tx_queues;
if (nb_rx_q == 0 && flags & RTE_PDUMP_FLAG_RX) {
PDUMP_LOG(ERR,
"number of rx queues cannot be 0\n");
return -EINVAL;
}
if (nb_tx_q == 0 && flags & RTE_PDUMP_FLAG_TX) {
PDUMP_LOG(ERR,
"number of tx queues cannot be 0\n");
return -EINVAL;
}
if ((nb_tx_q == 0 || nb_rx_q == 0) &&
flags == RTE_PDUMP_FLAG_RXTX) {
PDUMP_LOG(ERR,
"both tx&rx queues must be non zero\n");
return -EINVAL;
}
}
/* register RX callback */
if (flags & RTE_PDUMP_FLAG_RX) {
end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_rx_q : queue + 1;
ret = pdump_register_rx_callbacks(p->ver, end_q, port, queue,
ring, mp, filter,
operation, p->snaplen);
if (ret < 0)
return ret;
}
/* register TX callback */
if (flags & RTE_PDUMP_FLAG_TX) {
end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_tx_q : queue + 1;
ret = pdump_register_tx_callbacks(p->ver, end_q, port, queue,
ring, mp, filter,
operation, p->snaplen);
if (ret < 0)
return ret;
}
return ret;
}
static int
pdump_server(const struct rte_mp_msg *mp_msg, const void *peer)
{
struct rte_mp_msg mp_resp;
const struct pdump_request *cli_req;
struct pdump_response *resp = (struct pdump_response *)&mp_resp.param;
/* recv client requests */
if (mp_msg->len_param != sizeof(*cli_req)) {
PDUMP_LOG(ERR, "failed to recv from client\n");
resp->err_value = -EINVAL;
} else {
cli_req = (const struct pdump_request *)mp_msg->param;
resp->ver = cli_req->ver;
resp->res_op = cli_req->op;
resp->err_value = set_pdump_rxtx_cbs(cli_req);
}
rte_strscpy(mp_resp.name, PDUMP_MP, RTE_MP_MAX_NAME_LEN);
mp_resp.len_param = sizeof(*resp);
mp_resp.num_fds = 0;
if (rte_mp_reply(&mp_resp, peer) < 0) {
PDUMP_LOG(ERR, "failed to send to client:%s\n",
strerror(rte_errno));
return -1;
}
return 0;
}
int
rte_pdump_init(void)
{
const struct rte_memzone *mz;
int ret;
mz = rte_memzone_reserve(MZ_RTE_PDUMP_STATS, sizeof(*pdump_stats),
rte_socket_id(), 0);
if (mz == NULL) {
PDUMP_LOG(ERR, "cannot allocate pdump statistics\n");
rte_errno = ENOMEM;
return -1;
}
pdump_stats = mz->addr;
pdump_stats->mz = mz;
ret = rte_mp_action_register(PDUMP_MP, pdump_server);
if (ret && rte_errno != ENOTSUP)
return -1;
return 0;
}
int
rte_pdump_uninit(void)
{
rte_mp_action_unregister(PDUMP_MP);
if (pdump_stats != NULL) {
rte_memzone_free(pdump_stats->mz);
pdump_stats = NULL;
}
return 0;
}
static int
pdump_validate_ring_mp(struct rte_ring *ring, struct rte_mempool *mp)
{
if (ring == NULL || mp == NULL) {
PDUMP_LOG(ERR, "NULL ring or mempool\n");
rte_errno = EINVAL;
return -1;
}
if (mp->flags & RTE_MEMPOOL_F_SP_PUT ||
mp->flags & RTE_MEMPOOL_F_SC_GET) {
PDUMP_LOG(ERR,
"mempool with SP or SC set not valid for pdump,"
"must have MP and MC set\n");
rte_errno = EINVAL;
return -1;
}
if (rte_ring_is_prod_single(ring) || rte_ring_is_cons_single(ring)) {
PDUMP_LOG(ERR,
"ring with SP or SC set is not valid for pdump,"
"must have MP and MC set\n");
rte_errno = EINVAL;
return -1;
}
return 0;
}
static int
pdump_validate_flags(uint32_t flags)
{
if ((flags & RTE_PDUMP_FLAG_RXTX) == 0) {
PDUMP_LOG(ERR,
"invalid flags, should be either rx/tx/rxtx\n");
rte_errno = EINVAL;
return -1;
}
/* mask off the flags we know about */
if (flags & ~(RTE_PDUMP_FLAG_RXTX | RTE_PDUMP_FLAG_PCAPNG)) {
PDUMP_LOG(ERR,
"unknown flags: %#x\n", flags);
rte_errno = ENOTSUP;
return -1;
}
return 0;
}
static int
pdump_validate_port(uint16_t port, char *name)
{
int ret = 0;
if (port >= RTE_MAX_ETHPORTS) {
PDUMP_LOG(ERR, "Invalid port id %u\n", port);
rte_errno = EINVAL;
return -1;
}
ret = rte_eth_dev_get_name_by_port(port, name);
if (ret < 0) {
PDUMP_LOG(ERR, "port %u to name mapping failed\n",
port);
rte_errno = EINVAL;
return -1;
}
return 0;
}
static int
pdump_prepare_client_request(const char *device, uint16_t queue,
uint32_t flags, uint32_t snaplen,
uint16_t operation,
struct rte_ring *ring,
struct rte_mempool *mp,
const struct rte_bpf_prm *prm)
{
int ret = -1;
struct rte_mp_msg mp_req, *mp_rep;
struct rte_mp_reply mp_reply;
struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
struct pdump_request *req = (struct pdump_request *)mp_req.param;
struct pdump_response *resp;
memset(req, 0, sizeof(*req));
req->ver = (flags & RTE_PDUMP_FLAG_PCAPNG) ? V2 : V1;
req->flags = flags & RTE_PDUMP_FLAG_RXTX;
req->op = operation;
req->queue = queue;
rte_strscpy(req->device, device, sizeof(req->device));
if ((operation & ENABLE) != 0) {
req->ring = ring;
req->mp = mp;
req->prm = prm;
req->snaplen = snaplen;
}
rte_strscpy(mp_req.name, PDUMP_MP, RTE_MP_MAX_NAME_LEN);
mp_req.len_param = sizeof(*req);
mp_req.num_fds = 0;
if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0) {
mp_rep = &mp_reply.msgs[0];
resp = (struct pdump_response *)mp_rep->param;
rte_errno = resp->err_value;
if (!resp->err_value)
ret = 0;
free(mp_reply.msgs);
}
if (ret < 0)
PDUMP_LOG(ERR,
"client request for pdump enable/disable failed\n");
return ret;
}
/*
* There are two versions of this function, because although original API
* left place holder for future filter, it never checked the value.
* Therefore the API can't depend on application passing a non
* bogus value.
*/
static int
pdump_enable(uint16_t port, uint16_t queue,
uint32_t flags, uint32_t snaplen,
struct rte_ring *ring, struct rte_mempool *mp,
const struct rte_bpf_prm *prm)
{
int ret;
char name[RTE_DEV_NAME_MAX_LEN];
ret = pdump_validate_port(port, name);
if (ret < 0)
return ret;
ret = pdump_validate_ring_mp(ring, mp);
if (ret < 0)
return ret;
ret = pdump_validate_flags(flags);
if (ret < 0)
return ret;
if (snaplen == 0)
snaplen = UINT32_MAX;
return pdump_prepare_client_request(name, queue, flags, snaplen,
ENABLE, ring, mp, prm);
}
int
rte_pdump_enable(uint16_t port, uint16_t queue, uint32_t flags,
struct rte_ring *ring,
struct rte_mempool *mp,
void *filter __rte_unused)
{
return pdump_enable(port, queue, flags, 0,
ring, mp, NULL);
}
int
rte_pdump_enable_bpf(uint16_t port, uint16_t queue,
uint32_t flags, uint32_t snaplen,
struct rte_ring *ring,
struct rte_mempool *mp,
const struct rte_bpf_prm *prm)
{
return pdump_enable(port, queue, flags, snaplen,
ring, mp, prm);
}
static int
pdump_enable_by_deviceid(const char *device_id, uint16_t queue,
uint32_t flags, uint32_t snaplen,
struct rte_ring *ring,
struct rte_mempool *mp,
const struct rte_bpf_prm *prm)
{
int ret;
ret = pdump_validate_ring_mp(ring, mp);
if (ret < 0)
return ret;
ret = pdump_validate_flags(flags);
if (ret < 0)
return ret;
if (snaplen == 0)
snaplen = UINT32_MAX;
return pdump_prepare_client_request(device_id, queue, flags, snaplen,
ENABLE, ring, mp, prm);
}
int
rte_pdump_enable_by_deviceid(char *device_id, uint16_t queue,
uint32_t flags,
struct rte_ring *ring,
struct rte_mempool *mp,
void *filter __rte_unused)
{
return pdump_enable_by_deviceid(device_id, queue, flags, 0,
ring, mp, NULL);
}
int
rte_pdump_enable_bpf_by_deviceid(const char *device_id, uint16_t queue,
uint32_t flags, uint32_t snaplen,
struct rte_ring *ring,
struct rte_mempool *mp,
const struct rte_bpf_prm *prm)
{
return pdump_enable_by_deviceid(device_id, queue, flags, snaplen,
ring, mp, prm);
}
int
rte_pdump_disable(uint16_t port, uint16_t queue, uint32_t flags)
{
int ret = 0;
char name[RTE_DEV_NAME_MAX_LEN];
ret = pdump_validate_port(port, name);
if (ret < 0)
return ret;
ret = pdump_validate_flags(flags);
if (ret < 0)
return ret;
ret = pdump_prepare_client_request(name, queue, flags, 0,
DISABLE, NULL, NULL, NULL);
return ret;
}
int
rte_pdump_disable_by_deviceid(char *device_id, uint16_t queue,
uint32_t flags)
{
int ret = 0;
ret = pdump_validate_flags(flags);
if (ret < 0)
return ret;
ret = pdump_prepare_client_request(device_id, queue, flags, 0,
DISABLE, NULL, NULL, NULL);
return ret;
}
static void
pdump_sum_stats(uint16_t port, uint16_t nq,
struct rte_pdump_stats stats[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT],
struct rte_pdump_stats *total)
{
uint64_t *sum = (uint64_t *)total;
unsigned int i;
uint64_t val;
uint16_t qid;
for (qid = 0; qid < nq; qid++) {
const uint64_t *perq = (const uint64_t *)&stats[port][qid];
for (i = 0; i < sizeof(*total) / sizeof(uint64_t); i++) {
val = __atomic_load_n(&perq[i], __ATOMIC_RELAXED);
sum[i] += val;
}
}
}
int
rte_pdump_stats(uint16_t port, struct rte_pdump_stats *stats)
{
struct rte_eth_dev_info dev_info;
const struct rte_memzone *mz;
int ret;
memset(stats, 0, sizeof(*stats));
ret = rte_eth_dev_info_get(port, &dev_info);
if (ret != 0) {
PDUMP_LOG(ERR,
"Error during getting device (port %u) info: %s\n",
port, strerror(-ret));
return ret;
}
if (pdump_stats == NULL) {
if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
/* rte_pdump_init was not called */
PDUMP_LOG(ERR, "pdump stats not initialized\n");
rte_errno = EINVAL;
return -1;
}
/* secondary process looks up the memzone */
mz = rte_memzone_lookup(MZ_RTE_PDUMP_STATS);
if (mz == NULL) {
/* rte_pdump_init was not called in primary process?? */
PDUMP_LOG(ERR, "can not find pdump stats\n");
rte_errno = EINVAL;
return -1;
}
pdump_stats = mz->addr;
}
pdump_sum_stats(port, dev_info.nb_rx_queues, pdump_stats->rx, stats);
pdump_sum_stats(port, dev_info.nb_tx_queues, pdump_stats->tx, stats);
return 0;
}