lib/gro: add Generic Receive Offload API framework

Generic Receive Offload (GRO) is a widely used SW-based offloading
technique to reduce per-packet processing overhead. It gains
performance by reassembling small packets into large ones. This
patchset is to support GRO in DPDK. To support GRO, this patch
implements a GRO API framework.

To enable more flexibility to applications, DPDK GRO is implemented as
a user library. Applications explicitly use the GRO library to merge
small packets into large ones. DPDK GRO provides two reassembly modes.
One is called lightweight mode, the other is called heavyweight mode.
If applications want to merge packets in a simple way and the number
of packets is relatively small, they can use the lightweight mode.
If applications need more fine-grained controls, they can choose the
heavyweight mode.

rte_gro_reassemble_burst is the main reassembly API which is used in
lightweight mode and processes N packets at a time. For applications,
performing GRO in lightweight mode is simple. They just need to invoke
rte_gro_reassemble_burst. Applications can get GROed packets as soon as
rte_gro_reassemble_burst returns.

rte_gro_reassemble is the main reassembly API which is used in
heavyweight mode and tries to merge N inputted packets with the packets
in GRO reassembly tables. For applications, performing GRO in heavyweight
mode is relatively complicated. Before performing GRO, applications need
to create a GRO context object, which keeps reassembly tables of
desired GRO types, by rte_gro_ctx_create. Then applications can use
rte_gro_reassemble to merge packets. The GROed packets are in the
reassembly tables of the GRO context object. If applications want to get
them, applications need to manually flush them by flush API.

Signed-off-by: Jiayu Hu <jiayu.hu@intel.com>
Reviewed-by: Jianfeng Tan <jianfeng.tan@intel.com>
This commit is contained in:
Jiayu Hu 2017-07-09 13:46:44 +08:00 committed by Thomas Monjalon
parent a76f6b1b09
commit e996506a1c
11 changed files with 454 additions and 0 deletions

View File

@ -615,6 +615,10 @@ F: doc/guides/sample_app_ug/ip_frag.rst
F: examples/ip_reassembly/
F: doc/guides/sample_app_ug/ip_reassembly.rst
Generic Receive Offload - EXPERIMENTAL
M: Jiayu Hu <jiayu.hu@intel.com>
F: lib/librte_gro/
Distributor
M: Bruce Richardson <bruce.richardson@intel.com>
M: David Hunt <david.hunt@intel.com>

View File

@ -640,6 +640,11 @@ CONFIG_RTE_LIBRTE_IP_FRAG_DEBUG=n
CONFIG_RTE_LIBRTE_IP_FRAG_MAX_FRAG=4
CONFIG_RTE_LIBRTE_IP_FRAG_TBL_STAT=n
#
# Compile GRO library
#
CONFIG_RTE_LIBRTE_GRO=y
#
# Compile librte_meter
#

View File

@ -95,6 +95,7 @@ There are many libraries, so their headers may be grouped by topics:
[SCTP] (@ref rte_sctp.h),
[TCP] (@ref rte_tcp.h),
[UDP] (@ref rte_udp.h),
[GRO] (@ref rte_gro.h),
[frag/reass] (@ref rte_ip_frag.h),
[LPM IPv4 route] (@ref rte_lpm.h),
[LPM IPv6 route] (@ref rte_lpm6.h),

View File

@ -46,6 +46,7 @@ INPUT = doc/api/doxy-api-index.md \
lib/librte_efd \
lib/librte_ether \
lib/librte_eventdev \
lib/librte_gro \
lib/librte_hash \
lib/librte_ip_frag \
lib/librte_jobstats \

View File

@ -317,6 +317,7 @@ The libraries prepended with a plus sign were incremented in this version.
librte_distributor.so.1
librte_eal.so.4
librte_ethdev.so.6
+ librte_gro.so.1
librte_hash.so.2
librte_ip_frag.so.1
librte_jobstats.so.1

View File

@ -68,6 +68,8 @@ DEPDIRS-librte_net := librte_mbuf librte_eal
DIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += librte_ip_frag
DEPDIRS-librte_ip_frag := librte_eal librte_mempool librte_mbuf librte_ether
DEPDIRS-librte_ip_frag += librte_hash
DIRS-$(CONFIG_RTE_LIBRTE_GRO) += librte_gro
DEPDIRS-librte_gro := librte_eal librte_mbuf librte_ether librte_net
DIRS-$(CONFIG_RTE_LIBRTE_JOBSTATS) += librte_jobstats
DEPDIRS-librte_jobstats := librte_eal
DIRS-$(CONFIG_RTE_LIBRTE_METRICS) += librte_metrics

50
lib/librte_gro/Makefile Normal file
View File

@ -0,0 +1,50 @@
# BSD LICENSE
#
# Copyright(c) 2017 Intel Corporation. All rights reserved.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
include $(RTE_SDK)/mk/rte.vars.mk
# library name
LIB = librte_gro.a
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
EXPORT_MAP := rte_gro_version.map
LIBABIVER := 1
# source files
SRCS-$(CONFIG_RTE_LIBRTE_GRO) += rte_gro.c
# install this header file
SYMLINK-$(CONFIG_RTE_LIBRTE_GRO)-include += rte_gro.h
include $(RTE_SDK)/mk/rte.lib.mk

169
lib/librte_gro/rte_gro.c Normal file
View File

@ -0,0 +1,169 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2017 Intel Corporation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <rte_malloc.h>
#include <rte_mbuf.h>
#include "rte_gro.h"
typedef void *(*gro_tbl_create_fn)(uint16_t socket_id,
uint16_t max_flow_num,
uint16_t max_item_per_flow);
typedef void (*gro_tbl_destroy_fn)(void *tbl);
typedef uint32_t (*gro_tbl_pkt_count_fn)(void *tbl);
static gro_tbl_create_fn tbl_create_fn[RTE_GRO_TYPE_MAX_NUM];
static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM];
static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM];
/*
* GRO context structure, which is used to merge packets. It keeps
* many reassembly tables of desired GRO types. Applications need to
* create GRO context objects before using rte_gro_reassemble to
* perform GRO.
*/
struct gro_ctx {
/* GRO types to perform */
uint64_t gro_types;
/* reassembly tables */
void *tbls[RTE_GRO_TYPE_MAX_NUM];
};
void *
rte_gro_ctx_create(const struct rte_gro_param *param)
{
struct gro_ctx *gro_ctx;
gro_tbl_create_fn create_tbl_fn;
uint64_t gro_type_flag = 0;
uint64_t gro_types = 0;
uint8_t i;
gro_ctx = rte_zmalloc_socket(__func__,
sizeof(struct gro_ctx),
RTE_CACHE_LINE_SIZE,
param->socket_id);
if (gro_ctx == NULL)
return NULL;
for (i = 0; i < RTE_GRO_TYPE_MAX_NUM; i++) {
gro_type_flag = 1 << i;
if ((param->gro_types & gro_type_flag) == 0)
continue;
create_tbl_fn = tbl_create_fn[i];
if (create_tbl_fn == NULL)
continue;
gro_ctx->tbls[i] = create_tbl_fn(param->socket_id,
param->max_flow_num,
param->max_item_per_flow);
if (gro_ctx->tbls[i] == NULL) {
/* destroy all created tables */
gro_ctx->gro_types = gro_types;
rte_gro_ctx_destroy(gro_ctx);
return NULL;
}
gro_types |= gro_type_flag;
}
gro_ctx->gro_types = param->gro_types;
return gro_ctx;
}
void
rte_gro_ctx_destroy(void *ctx)
{
gro_tbl_destroy_fn destroy_tbl_fn;
struct gro_ctx *gro_ctx = ctx;
uint64_t gro_type_flag;
uint8_t i;
if (gro_ctx == NULL)
return;
for (i = 0; i < RTE_GRO_TYPE_MAX_NUM; i++) {
gro_type_flag = 1 << i;
if ((gro_ctx->gro_types & gro_type_flag) == 0)
continue;
destroy_tbl_fn = tbl_destroy_fn[i];
if (destroy_tbl_fn)
destroy_tbl_fn(gro_ctx->tbls[i]);
}
rte_free(gro_ctx);
}
uint16_t
rte_gro_reassemble_burst(struct rte_mbuf **pkts __rte_unused,
uint16_t nb_pkts,
const struct rte_gro_param *param __rte_unused)
{
return nb_pkts;
}
uint16_t
rte_gro_reassemble(struct rte_mbuf **pkts __rte_unused,
uint16_t nb_pkts,
void *ctx __rte_unused)
{
return nb_pkts;
}
uint16_t
rte_gro_timeout_flush(void *ctx __rte_unused,
uint64_t timeout_cycles __rte_unused,
uint64_t gro_types __rte_unused,
struct rte_mbuf **out __rte_unused,
uint16_t max_nb_out __rte_unused)
{
return 0;
}
uint64_t
rte_gro_get_pkt_count(void *ctx)
{
struct gro_ctx *gro_ctx = ctx;
gro_tbl_pkt_count_fn pkt_count_fn;
uint64_t item_num = 0;
uint64_t gro_type_flag;
uint8_t i;
for (i = 0; i < RTE_GRO_TYPE_MAX_NUM; i++) {
gro_type_flag = 1 << i;
if ((gro_ctx->gro_types & gro_type_flag) == 0)
continue;
pkt_count_fn = tbl_pkt_count_fn[i];
if (pkt_count_fn == NULL)
continue;
item_num += pkt_count_fn(gro_ctx->tbls[i]);
}
return item_num;
}

208
lib/librte_gro/rte_gro.h Normal file
View File

@ -0,0 +1,208 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2017 Intel Corporation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _RTE_GRO_H_
#define _RTE_GRO_H_
#ifdef __cplusplus
extern "C" {
#endif
/**< the max packets number that rte_gro_reassemble_burst()
* can process in each invocation.
*/
#define RTE_GRO_MAX_BURST_ITEM_NUM 128U
/**< max number of supported GRO types */
#define RTE_GRO_TYPE_MAX_NUM 64
/**< current supported GRO num */
#define RTE_GRO_TYPE_SUPPORT_NUM 0
struct rte_gro_param {
/**< desired GRO types */
uint64_t gro_types;
/**< max flow number */
uint16_t max_flow_num;
/**< max packet number per flow */
uint16_t max_item_per_flow;
/**< socket index for allocating GRO related data structures,
* like reassembly tables. When use rte_gro_reassemble_burst(),
* applications don't need to set this value.
*/
uint16_t socket_id;
};
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
* This function create a GRO context object, which is used to merge
* packets in rte_gro_reassemble().
*
* @param param
* applications use it to pass needed parameters to create a GRO
* context object.
*
* @return
* if create successfully, return a pointer which points to the GRO
* context object. Otherwise, return NULL.
*/
void *rte_gro_ctx_create(const struct rte_gro_param *param);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
* This function destroys a GRO context object.
*
* @param ctx
* pointer points to a GRO context object.
*/
void rte_gro_ctx_destroy(void *ctx);
/**
* This is one of the main reassembly APIs, which merges numbers of
* packets at a time. It assumes that all inputted packets are with
* correct checksums. That is, applications should guarantee all
* inputted packets are correct. Besides, it doesn't re-calculate
* checksums for merged packets. If inputted packets are IP fragmented,
* this function assumes them are complete (i.e. with L4 header). After
* finishing processing, it returns all GROed packets to applications
* immediately.
*
* @param pkts
* a pointer array which points to the packets to reassemble. Besides,
* it keeps packet addresses for GROed packets.
* @param nb_pkts
* the number of packets to reassemble.
* @param param
* applications use it to tell rte_gro_reassemble_burst() what rules
* are demanded.
*
* @return
* the number of packets after been GROed. If no packets are merged,
* the returned value is nb_pkts.
*/
uint16_t rte_gro_reassemble_burst(struct rte_mbuf **pkts,
uint16_t nb_pkts,
const struct rte_gro_param *param);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
* Reassembly function, which tries to merge inputted packets with
* the packets in the reassembly tables of a given GRO context. This
* function assumes all inputted packets are with correct checksums.
* And it won't update checksums if two packets are merged. Besides,
* if inputted packets are IP fragmented, this function assumes they
* are complete packets (i.e. with L4 header).
*
* If the inputted packets don't have data or are with unsupported GRO
* types etc., they won't be processed and are returned to applications.
* Otherwise, the inputted packets are either merged or inserted into
* the table. If applications want get packets in the table, they need
* to call flush API.
*
* @param pkts
* packet to reassemble. Besides, after this function finishes, it
* keeps the unprocessed packets (e.g. without data or unsupported
* GRO types).
* @param nb_pkts
* the number of packets to reassemble.
* @param ctx
* a pointer points to a GRO context object.
*
* @return
* return the number of unprocessed packets (e.g. without data or
* unsupported GRO types). If all packets are processed (merged or
* inserted into the table), return 0.
*/
uint16_t rte_gro_reassemble(struct rte_mbuf **pkts,
uint16_t nb_pkts,
void *ctx);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
* This function flushes the timeout packets from reassembly tables of
* desired GRO types. The max number of flushed timeout packets is the
* element number of the array which is used to keep the flushed packets.
*
* Besides, this function won't re-calculate checksums for merged
* packets in the tables. That is, the returned packets may be with
* wrong checksums.
*
* @param ctx
* a pointer points to a GRO context object.
* @param timeout_cycles
* max TTL for packets in reassembly tables, measured in nanosecond.
* @param gro_types
* this function only flushes packets which belong to the GRO types
* specified by gro_types.
* @param out
* a pointer array that is used to keep flushed timeout packets.
* @param nb_out
* the element number of out. It's also the max number of timeout
* packets that can be flushed finally.
*
* @return
* the number of flushed packets. If no packets are flushed, return 0.
*/
uint16_t rte_gro_timeout_flush(void *ctx,
uint64_t timeout_cycles,
uint64_t gro_types,
struct rte_mbuf **out,
uint16_t max_nb_out);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
* This function returns the number of packets in all reassembly tables
* of a given GRO context.
*
* @param ctx
* pointer points to a GRO context object.
*
* @return
* the number of packets in all reassembly tables.
*/
uint64_t rte_gro_get_pkt_count(void *ctx);
#ifdef __cplusplus
}
#endif
#endif /* _RTE_GRO_H_ */

View File

@ -0,0 +1,12 @@
DPDK_17.08 {
global:
rte_gro_ctrl_create;
rte_gro_ctrl_destroy;
rte_gro_get_pkt_count;
rte_gro_reassemble;
rte_gro_reassemble_burst;
rte_gro_timeout_flush;
local: *;
};

View File

@ -65,6 +65,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_PORT) += -lrte_port
_LDLIBS-$(CONFIG_RTE_LIBRTE_PDUMP) += -lrte_pdump
_LDLIBS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += -lrte_distributor
_LDLIBS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += -lrte_ip_frag
_LDLIBS-$(CONFIG_RTE_LIBRTE_GRO) += -lrte_gro
_LDLIBS-$(CONFIG_RTE_LIBRTE_METER) += -lrte_meter
_LDLIBS-$(CONFIG_RTE_LIBRTE_SCHED) += -lrte_sched
_LDLIBS-$(CONFIG_RTE_LIBRTE_LPM) += -lrte_lpm