bpf: introduce basic Rx/Tx filters

Introduce API to install BPF based filters on ethdev RX/TX path.
Current implementation is pure SW one, based on ethdev RX/TX
callback mechanism.

Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Acked-by: Ferruh Yigit <ferruh.yigit@intel.com>
This commit is contained in:
Konstantin Ananyev 2018-05-10 11:23:07 +01:00 committed by Thomas Monjalon
parent cc752e43e0
commit a93ff62a89
6 changed files with 729 additions and 2 deletions

View File

@ -226,6 +226,8 @@ New Features
The BPF Library provides the ability to load and execute
Enhanced Berkeley Packet Filter (eBPF) within user-space dpdk application.
Also it introduces basic framework to load/unload BPF-based filters
on eth devices (right now only via SW RX/TX callbacks).
It also adds dependency on libelf.

View File

@ -24,6 +24,7 @@ LIBABIVER := 1
SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf.c
SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_exec.c
SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_load.c
SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_pkt.c
SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_validate.c
ifeq ($(CONFIG_RTE_LIBRTE_BPF_ELF),y)
SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_load_elf.c
@ -35,5 +36,6 @@ endif
# install header files
SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += bpf_def.h
SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += rte_bpf.h
SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += rte_bpf_ethdev.h
include $(RTE_SDK)/mk/rte.lib.mk

605
lib/librte_bpf/bpf_pkt.c Normal file
View File

@ -0,0 +1,605 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2018 Intel Corporation
*/
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <stdint.h>
#include <unistd.h>
#include <inttypes.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/queue.h>
#include <rte_common.h>
#include <rte_byteorder.h>
#include <rte_malloc.h>
#include <rte_log.h>
#include <rte_debug.h>
#include <rte_cycles.h>
#include <rte_eal.h>
#include <rte_per_lcore.h>
#include <rte_lcore.h>
#include <rte_atomic.h>
#include <rte_mbuf.h>
#include <rte_ethdev.h>
#include <rte_bpf_ethdev.h>
#include "bpf_impl.h"
/*
* information about installed BPF rx/tx callback
*/
struct bpf_eth_cbi {
/* used by both data & control path */
uint32_t use; /*usage counter */
const struct rte_eth_rxtx_callback *cb; /* callback handle */
struct rte_bpf *bpf;
struct rte_bpf_jit jit;
/* used by control path only */
LIST_ENTRY(bpf_eth_cbi) link;
uint16_t port;
uint16_t queue;
} __rte_cache_aligned;
/*
* Odd number means that callback is used by datapath.
* Even number means that callback is not used by datapath.
*/
#define BPF_ETH_CBI_INUSE 1
/*
* List to manage RX/TX installed callbacks.
*/
LIST_HEAD(bpf_eth_cbi_list, bpf_eth_cbi);
enum {
BPF_ETH_RX,
BPF_ETH_TX,
BPF_ETH_NUM,
};
/*
* information about all installed BPF rx/tx callbacks
*/
struct bpf_eth_cbh {
rte_spinlock_t lock;
struct bpf_eth_cbi_list list;
uint32_t type;
};
static struct bpf_eth_cbh rx_cbh = {
.lock = RTE_SPINLOCK_INITIALIZER,
.list = LIST_HEAD_INITIALIZER(list),
.type = BPF_ETH_RX,
};
static struct bpf_eth_cbh tx_cbh = {
.lock = RTE_SPINLOCK_INITIALIZER,
.list = LIST_HEAD_INITIALIZER(list),
.type = BPF_ETH_TX,
};
/*
* Marks given callback as used by datapath.
*/
static __rte_always_inline void
bpf_eth_cbi_inuse(struct bpf_eth_cbi *cbi)
{
cbi->use++;
/* make sure no store/load reordering could happen */
rte_smp_mb();
}
/*
* Marks given callback list as not used by datapath.
*/
static __rte_always_inline void
bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
{
/* make sure all previous loads are completed */
rte_smp_rmb();
cbi->use++;
}
/*
* Waits till datapath finished using given callback.
*/
static void
bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
{
uint32_t nuse, puse;
/* make sure all previous loads and stores are completed */
rte_smp_mb();
puse = cbi->use;
/* in use, busy wait till current RX/TX iteration is finished */
if ((puse & BPF_ETH_CBI_INUSE) != 0) {
do {
rte_pause();
rte_compiler_barrier();
nuse = cbi->use;
} while (nuse == puse);
}
}
static void
bpf_eth_cbi_cleanup(struct bpf_eth_cbi *bc)
{
bc->bpf = NULL;
memset(&bc->jit, 0, sizeof(bc->jit));
}
static struct bpf_eth_cbi *
bpf_eth_cbh_find(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue)
{
struct bpf_eth_cbi *cbi;
LIST_FOREACH(cbi, &cbh->list, link) {
if (cbi->port == port && cbi->queue == queue)
break;
}
return cbi;
}
static struct bpf_eth_cbi *
bpf_eth_cbh_add(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue)
{
struct bpf_eth_cbi *cbi;
/* return an existing one */
cbi = bpf_eth_cbh_find(cbh, port, queue);
if (cbi != NULL)
return cbi;
cbi = rte_zmalloc(NULL, sizeof(*cbi), RTE_CACHE_LINE_SIZE);
if (cbi != NULL) {
cbi->port = port;
cbi->queue = queue;
LIST_INSERT_HEAD(&cbh->list, cbi, link);
}
return cbi;
}
/*
* BPF packet processing routinies.
*/
static inline uint32_t
apply_filter(struct rte_mbuf *mb[], const uint64_t rc[], uint32_t num,
uint32_t drop)
{
uint32_t i, j, k;
struct rte_mbuf *dr[num];
for (i = 0, j = 0, k = 0; i != num; i++) {
/* filter matches */
if (rc[i] != 0)
mb[j++] = mb[i];
/* no match */
else
dr[k++] = mb[i];
}
if (drop != 0) {
/* free filtered out mbufs */
for (i = 0; i != k; i++)
rte_pktmbuf_free(dr[i]);
} else {
/* copy filtered out mbufs beyond good ones */
for (i = 0; i != k; i++)
mb[j + i] = dr[i];
}
return j;
}
static inline uint32_t
pkt_filter_vm(const struct rte_bpf *bpf, struct rte_mbuf *mb[], uint32_t num,
uint32_t drop)
{
uint32_t i;
void *dp[num];
uint64_t rc[num];
for (i = 0; i != num; i++)
dp[i] = rte_pktmbuf_mtod(mb[i], void *);
rte_bpf_exec_burst(bpf, dp, rc, num);
return apply_filter(mb, rc, num, drop);
}
static inline uint32_t
pkt_filter_jit(const struct rte_bpf_jit *jit, struct rte_mbuf *mb[],
uint32_t num, uint32_t drop)
{
uint32_t i, n;
void *dp;
uint64_t rc[num];
n = 0;
for (i = 0; i != num; i++) {
dp = rte_pktmbuf_mtod(mb[i], void *);
rc[i] = jit->func(dp);
n += (rc[i] == 0);
}
if (n != 0)
num = apply_filter(mb, rc, num, drop);
return num;
}
static inline uint32_t
pkt_filter_mb_vm(const struct rte_bpf *bpf, struct rte_mbuf *mb[], uint32_t num,
uint32_t drop)
{
uint64_t rc[num];
rte_bpf_exec_burst(bpf, (void **)mb, rc, num);
return apply_filter(mb, rc, num, drop);
}
static inline uint32_t
pkt_filter_mb_jit(const struct rte_bpf_jit *jit, struct rte_mbuf *mb[],
uint32_t num, uint32_t drop)
{
uint32_t i, n;
uint64_t rc[num];
n = 0;
for (i = 0; i != num; i++) {
rc[i] = jit->func(mb[i]);
n += (rc[i] == 0);
}
if (n != 0)
num = apply_filter(mb, rc, num, drop);
return num;
}
/*
* RX/TX callbacks for raw data bpf.
*/
static uint16_t
bpf_rx_callback_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
struct rte_mbuf *pkt[], uint16_t nb_pkts,
__rte_unused uint16_t max_pkts, void *user_param)
{
struct bpf_eth_cbi *cbi;
uint16_t rc;
cbi = user_param;
bpf_eth_cbi_inuse(cbi);
rc = (cbi->cb != NULL) ?
pkt_filter_vm(cbi->bpf, pkt, nb_pkts, 1) :
nb_pkts;
bpf_eth_cbi_unuse(cbi);
return rc;
}
static uint16_t
bpf_rx_callback_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
struct rte_mbuf *pkt[], uint16_t nb_pkts,
__rte_unused uint16_t max_pkts, void *user_param)
{
struct bpf_eth_cbi *cbi;
uint16_t rc;
cbi = user_param;
bpf_eth_cbi_inuse(cbi);
rc = (cbi->cb != NULL) ?
pkt_filter_jit(&cbi->jit, pkt, nb_pkts, 1) :
nb_pkts;
bpf_eth_cbi_unuse(cbi);
return rc;
}
static uint16_t
bpf_tx_callback_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
{
struct bpf_eth_cbi *cbi;
uint16_t rc;
cbi = user_param;
bpf_eth_cbi_inuse(cbi);
rc = (cbi->cb != NULL) ?
pkt_filter_vm(cbi->bpf, pkt, nb_pkts, 0) :
nb_pkts;
bpf_eth_cbi_unuse(cbi);
return rc;
}
static uint16_t
bpf_tx_callback_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
{
struct bpf_eth_cbi *cbi;
uint16_t rc;
cbi = user_param;
bpf_eth_cbi_inuse(cbi);
rc = (cbi->cb != NULL) ?
pkt_filter_jit(&cbi->jit, pkt, nb_pkts, 0) :
nb_pkts;
bpf_eth_cbi_unuse(cbi);
return rc;
}
/*
* RX/TX callbacks for mbuf.
*/
static uint16_t
bpf_rx_callback_mb_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
struct rte_mbuf *pkt[], uint16_t nb_pkts,
__rte_unused uint16_t max_pkts, void *user_param)
{
struct bpf_eth_cbi *cbi;
uint16_t rc;
cbi = user_param;
bpf_eth_cbi_inuse(cbi);
rc = (cbi->cb != NULL) ?
pkt_filter_mb_vm(cbi->bpf, pkt, nb_pkts, 1) :
nb_pkts;
bpf_eth_cbi_unuse(cbi);
return rc;
}
static uint16_t
bpf_rx_callback_mb_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
struct rte_mbuf *pkt[], uint16_t nb_pkts,
__rte_unused uint16_t max_pkts, void *user_param)
{
struct bpf_eth_cbi *cbi;
uint16_t rc;
cbi = user_param;
bpf_eth_cbi_inuse(cbi);
rc = (cbi->cb != NULL) ?
pkt_filter_mb_jit(&cbi->jit, pkt, nb_pkts, 1) :
nb_pkts;
bpf_eth_cbi_unuse(cbi);
return rc;
}
static uint16_t
bpf_tx_callback_mb_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
{
struct bpf_eth_cbi *cbi;
uint16_t rc;
cbi = user_param;
bpf_eth_cbi_inuse(cbi);
rc = (cbi->cb != NULL) ?
pkt_filter_mb_vm(cbi->bpf, pkt, nb_pkts, 0) :
nb_pkts;
bpf_eth_cbi_unuse(cbi);
return rc;
}
static uint16_t
bpf_tx_callback_mb_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
{
struct bpf_eth_cbi *cbi;
uint16_t rc;
cbi = user_param;
bpf_eth_cbi_inuse(cbi);
rc = (cbi->cb != NULL) ?
pkt_filter_mb_jit(&cbi->jit, pkt, nb_pkts, 0) :
nb_pkts;
bpf_eth_cbi_unuse(cbi);
return rc;
}
static rte_rx_callback_fn
select_rx_callback(enum rte_bpf_arg_type type, uint32_t flags)
{
if (flags & RTE_BPF_ETH_F_JIT) {
if (type == RTE_BPF_ARG_PTR)
return bpf_rx_callback_jit;
else if (type == RTE_BPF_ARG_PTR_MBUF)
return bpf_rx_callback_mb_jit;
} else if (type == RTE_BPF_ARG_PTR)
return bpf_rx_callback_vm;
else if (type == RTE_BPF_ARG_PTR_MBUF)
return bpf_rx_callback_mb_vm;
return NULL;
}
static rte_tx_callback_fn
select_tx_callback(enum rte_bpf_arg_type type, uint32_t flags)
{
if (flags & RTE_BPF_ETH_F_JIT) {
if (type == RTE_BPF_ARG_PTR)
return bpf_tx_callback_jit;
else if (type == RTE_BPF_ARG_PTR_MBUF)
return bpf_tx_callback_mb_jit;
} else if (type == RTE_BPF_ARG_PTR)
return bpf_tx_callback_vm;
else if (type == RTE_BPF_ARG_PTR_MBUF)
return bpf_tx_callback_mb_vm;
return NULL;
}
/*
* helper function to perform BPF unload for given port/queue.
* have to introduce extra complexity (and possible slowdown) here,
* as right now there is no safe generic way to remove RX/TX callback
* while IO is active.
* Still don't free memory allocated for callback handle itself,
* again right now there is no safe way to do that without stopping RX/TX
* on given port/queue first.
*/
static void
bpf_eth_cbi_unload(struct bpf_eth_cbi *bc)
{
/* mark this cbi as empty */
bc->cb = NULL;
rte_smp_mb();
/* make sure datapath doesn't use bpf anymore, then destroy bpf */
bpf_eth_cbi_wait(bc);
rte_bpf_destroy(bc->bpf);
bpf_eth_cbi_cleanup(bc);
}
static void
bpf_eth_unload(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue)
{
struct bpf_eth_cbi *bc;
bc = bpf_eth_cbh_find(cbh, port, queue);
if (bc == NULL || bc->cb == NULL)
return;
if (cbh->type == BPF_ETH_RX)
rte_eth_remove_rx_callback(port, queue, bc->cb);
else
rte_eth_remove_tx_callback(port, queue, bc->cb);
bpf_eth_cbi_unload(bc);
}
__rte_experimental void
rte_bpf_eth_rx_unload(uint16_t port, uint16_t queue)
{
struct bpf_eth_cbh *cbh;
cbh = &rx_cbh;
rte_spinlock_lock(&cbh->lock);
bpf_eth_unload(cbh, port, queue);
rte_spinlock_unlock(&cbh->lock);
}
__rte_experimental void
rte_bpf_eth_tx_unload(uint16_t port, uint16_t queue)
{
struct bpf_eth_cbh *cbh;
cbh = &tx_cbh;
rte_spinlock_lock(&cbh->lock);
bpf_eth_unload(cbh, port, queue);
rte_spinlock_unlock(&cbh->lock);
}
static int
bpf_eth_elf_load(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue,
const struct rte_bpf_prm *prm, const char *fname, const char *sname,
uint32_t flags)
{
int32_t rc;
struct bpf_eth_cbi *bc;
struct rte_bpf *bpf;
rte_rx_callback_fn frx;
rte_tx_callback_fn ftx;
struct rte_bpf_jit jit;
frx = NULL;
ftx = NULL;
if (prm == NULL || rte_eth_dev_is_valid_port(port) == 0 ||
queue >= RTE_MAX_QUEUES_PER_PORT)
return -EINVAL;
if (cbh->type == BPF_ETH_RX)
frx = select_rx_callback(prm->prog_arg.type, flags);
else
ftx = select_tx_callback(prm->prog_arg.type, flags);
if (frx == NULL && ftx == NULL) {
RTE_BPF_LOG(ERR, "%s(%u, %u): no callback selected;\n",
__func__, port, queue);
return -EINVAL;
}
bpf = rte_bpf_elf_load(prm, fname, sname);
if (bpf == NULL)
return -rte_errno;
rte_bpf_get_jit(bpf, &jit);
if ((flags & RTE_BPF_ETH_F_JIT) != 0 && jit.func == NULL) {
RTE_BPF_LOG(ERR, "%s(%u, %u): no JIT generated;\n",
__func__, port, queue);
rte_bpf_destroy(bpf);
return -ENOTSUP;
}
/* setup/update global callback info */
bc = bpf_eth_cbh_add(cbh, port, queue);
if (bc == NULL)
return -ENOMEM;
/* remove old one, if any */
if (bc->cb != NULL)
bpf_eth_unload(cbh, port, queue);
bc->bpf = bpf;
bc->jit = jit;
if (cbh->type == BPF_ETH_RX)
bc->cb = rte_eth_add_rx_callback(port, queue, frx, bc);
else
bc->cb = rte_eth_add_tx_callback(port, queue, ftx, bc);
if (bc->cb == NULL) {
rc = -rte_errno;
rte_bpf_destroy(bpf);
bpf_eth_cbi_cleanup(bc);
} else
rc = 0;
return rc;
}
__rte_experimental int
rte_bpf_eth_rx_elf_load(uint16_t port, uint16_t queue,
const struct rte_bpf_prm *prm, const char *fname, const char *sname,
uint32_t flags)
{
int32_t rc;
struct bpf_eth_cbh *cbh;
cbh = &rx_cbh;
rte_spinlock_lock(&cbh->lock);
rc = bpf_eth_elf_load(cbh, port, queue, prm, fname, sname, flags);
rte_spinlock_unlock(&cbh->lock);
return rc;
}
__rte_experimental int
rte_bpf_eth_tx_elf_load(uint16_t port, uint16_t queue,
const struct rte_bpf_prm *prm, const char *fname, const char *sname,
uint32_t flags)
{
int32_t rc;
struct bpf_eth_cbh *cbh;
cbh = &tx_cbh;
rte_spinlock_lock(&cbh->lock);
rc = bpf_eth_elf_load(cbh, port, queue, prm, fname, sname, flags);
rte_spinlock_unlock(&cbh->lock);
return rc;
}

View File

@ -5,6 +5,7 @@ allow_experimental_apis = true
sources = files('bpf.c',
'bpf_exec.c',
'bpf_load.c',
'bpf_pkt.c',
'bpf_validate.c')
if arch_subdir == 'x86'
@ -12,9 +13,10 @@ if arch_subdir == 'x86'
endif
install_headers = files('bpf_def.h',
'rte_bpf.h')
'rte_bpf.h',
'rte_bpf_ethdev.h')
deps += ['mbuf', 'net']
deps += ['mbuf', 'net', 'ethdev']
dep = cc.find_library('elf', required: false)
if dep.found() == true and cc.has_header('libelf.h', dependencies: dep)

View File

@ -0,0 +1,112 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2018 Intel Corporation
*/
#ifndef _RTE_BPF_ETHDEV_H_
#define _RTE_BPF_ETHDEV_H_
/**
* @file
*
* API to install BPF filter as RX/TX callbacks for eth devices.
* Note that right now:
* - it is not MT safe, i.e. it is not allowed to do load/unload for the
* same port/queue from different threads in parallel.
* - though it allows to do load/unload at runtime
* (while RX/TX is ongoing on given port/queue).
* - allows only one BPF program per port/queue,
* i.e. new load will replace previously loaded for that port/queue BPF program.
* Filter behaviour - if BPF program returns zero value for a given packet,
* then it will be dropped inside callback and no further processing
* on RX - it will be dropped inside callback and no further processing
* for that packet will happen.
* on TX - packet will remain unsent, and it is responsibility of the user
* to handle such situation (drop, try to send again, etc.).
*/
#include <rte_bpf.h>
#ifdef __cplusplus
extern "C" {
#endif
enum {
RTE_BPF_ETH_F_NONE = 0,
RTE_BPF_ETH_F_JIT = 0x1, /*< use compiled into native ISA code */
};
/**
* Unload previously loaded BPF program (if any) from given RX port/queue
* and remove appropriate RX port/queue callback.
*
* @param port
* The identifier of the ethernet port
* @param queue
* The identifier of the RX queue on the given port
*/
void rte_bpf_eth_rx_unload(uint16_t port, uint16_t queue);
/**
* Unload previously loaded BPF program (if any) from given TX port/queue
* and remove appropriate TX port/queue callback.
*
* @param port
* The identifier of the ethernet port
* @param queue
* The identifier of the TX queue on the given port
*/
void rte_bpf_eth_tx_unload(uint16_t port, uint16_t queue);
/**
* Load BPF program from the ELF file and install callback to execute it
* on given RX port/queue.
*
* @param port
* The identifier of the ethernet port
* @param queue
* The identifier of the RX queue on the given port
* @param fname
* Pathname for a ELF file.
* @param sname
* Name of the executable section within the file to load.
* @param prm
* Parameters used to create and initialise the BPF exeution context.
* @param flags
* Flags that define expected expected behavior of the loaded filter
* (i.e. jited/non-jited version to use).
* @return
* Zero on successful completion or negative error code otherwise.
*/
int rte_bpf_eth_rx_elf_load(uint16_t port, uint16_t queue,
const struct rte_bpf_prm *prm, const char *fname, const char *sname,
uint32_t flags);
/**
* Load BPF program from the ELF file and install callback to execute it
* on given TX port/queue.
*
* @param port
* The identifier of the ethernet port
* @param queue
* The identifier of the TX queue on the given port
* @param fname
* Pathname for a ELF file.
* @param sname
* Name of the executable section within the file to load.
* @param prm
* Parameters used to create and initialise the BPF exeution context.
* @param flags
* Flags that define expected expected behavior of the loaded filter
* (i.e. jited/non-jited version to use).
* @return
* Zero on successful completion or negative error code otherwise.
*/
int rte_bpf_eth_tx_elf_load(uint16_t port, uint16_t queue,
const struct rte_bpf_prm *prm, const char *fname, const char *sname,
uint32_t flags);
#ifdef __cplusplus
}
#endif
#endif /* _RTE_BPF_ETHDEV_H_ */

View File

@ -3,6 +3,10 @@ EXPERIMENTAL {
rte_bpf_destroy;
rte_bpf_elf_load;
rte_bpf_eth_rx_elf_load;
rte_bpf_eth_rx_unload;
rte_bpf_eth_tx_elf_load;
rte_bpf_eth_tx_unload;
rte_bpf_exec;
rte_bpf_exec_burst;
rte_bpf_get_jit;