From 7272f9cd774c4643a33d92db6cc3e1641ceab5c9 Mon Sep 17 00:00:00 2001 From: Hans Petter Selasky Date: Fri, 6 Dec 2019 15:36:32 +0000 Subject: [PATCH] Implement hardware TLS via send tags for mlx5en(4), which is supported by ConnectX-6 DX. Currently TLS v1.2 and v1.3 with AES 128/256 crypto over TCP/IP (v4 and v6) is supported. A per PCI device UMA zone is used to manage the memory of the send tags. To optimize performance some crypto contexts may be cached by the UMA zone, until the UMA zone finishes the memory of the given send tag. An asynchronous task is used manage setup of the send tags towards the firmware. Most importantly setting the AES 128/256 bit pre-shared keys for the crypto context. Updating the state of the AES crypto engine and encrypting data, is all done in the fast path. Each send tag tracks the TCP sequence number in order to detect non-contiguous blocks of data, which may require a dump of prior unencrypted data, to restore the crypto state prior to wire transmission. Statistics counters have been added to count the amount of TLS data transmitted in total, and the amount of TLS data which has been dumped prior to transmission. When non-contiguous TCP sequence numbers are detected, the software needs to dump the beginning of the current TLS record up until the point of retransmission. All TLS counters utilize the counter(9) API. In order to enable hardware TLS offload the following sysctls must be set: kern.ipc.mb_use_ext_pgs=1 kern.ipc.tls.ifnet.permitted=1 kern.ipc.tls.enable=1 Sponsored by: Mellanox Technologies --- sys/conf/files | 2 + sys/dev/mlx5/device.h | 1 + sys/dev/mlx5/mlx5_en/en.h | 30 +- sys/dev/mlx5/mlx5_en/en_hw_tls.h | 104 +++ sys/dev/mlx5/mlx5_en/en_rl.h | 1 + sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c | 20 + sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c | 834 +++++++++++++++++++++++++ sys/dev/mlx5/mlx5_en/mlx5_en_main.c | 75 ++- sys/dev/mlx5/mlx5_en/mlx5_en_rl.c | 17 +- sys/dev/mlx5/mlx5_en/mlx5_en_tx.c | 372 ++++++++--- sys/modules/mlx5en/Makefile | 4 +- 11 files changed, 1372 insertions(+), 88 deletions(-) create mode 100644 sys/dev/mlx5/mlx5_en/en_hw_tls.h create mode 100644 sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c diff --git a/sys/conf/files b/sys/conf/files index 390c900818ed..4bd31d1aa174 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -4781,6 +4781,8 @@ dev/mlx5/mlx5_en/mlx5_en_tx.c optional mlx5en pci inet inet6 \ compile-with "${OFED_C}" dev/mlx5/mlx5_en/mlx5_en_flow_table.c optional mlx5en pci inet inet6 \ compile-with "${OFED_C}" +dev/mlx5/mlx5_en/mlx5_en_hw_tls.c optional mlx5en pci inet inet6 \ + compile-with "${OFED_C}" dev/mlx5/mlx5_en/mlx5_en_rx.c optional mlx5en pci inet inet6 \ compile-with "${OFED_C}" dev/mlx5/mlx5_en/mlx5_en_rl.c optional mlx5en pci inet inet6 \ diff --git a/sys/dev/mlx5/device.h b/sys/dev/mlx5/device.h index 59ce900fc19b..a4dfb6e8ce0d 100644 --- a/sys/dev/mlx5/device.h +++ b/sys/dev/mlx5/device.h @@ -361,6 +361,7 @@ enum { MLX5_OPCODE_ATOMIC_MASKED_FA = 0x15, MLX5_OPCODE_BIND_MW = 0x18, MLX5_OPCODE_CONFIG_CMD = 0x1f, + MLX5_OPCODE_DUMP = 0x23, MLX5_RECV_OPCODE_RDMA_WRITE_IMM = 0x00, MLX5_RECV_OPCODE_SEND = 0x01, diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h index 28b1a6ca9793..3617b539f111 100644 --- a/sys/dev/mlx5/mlx5_en/en.h +++ b/sys/dev/mlx5/mlx5_en/en.h @@ -53,6 +53,7 @@ #include #include #include +#include #include "opt_rss.h" @@ -167,6 +168,7 @@ typedef void (mlx5e_cq_comp_t)(struct mlx5_core_cq *); #define MLX5E_STATS_COUNT(a, ...) a #define MLX5E_STATS_VAR(a, b, c, ...) b c; +#define MLX5E_STATS_COUNTER(a, b, c, ...) counter_##b##_t c; #define MLX5E_STATS_DESC(a, b, c, d, e, ...) d, e, #define MLX5E_VPORT_STATS(m) \ @@ -724,6 +726,7 @@ struct mlx5e_params_ethtool { u8 fec_avail_10x_25x[MLX5E_MAX_FEC_10X_25X]; u16 fec_avail_50x[MLX5E_MAX_FEC_50X]; u32 fec_mode_active; + u32 hw_mtu_msb; }; struct mlx5e_cq { @@ -775,6 +778,7 @@ struct mlx5e_rq { struct mlx5e_sq_mbuf { bus_dmamap_t dma_map; struct mbuf *mbuf; + volatile s32 *p_refcount; /* in use refcount, if any */ u32 num_bytes; u32 num_wqebbs; }; @@ -959,9 +963,14 @@ struct mlx5e_flow_tables { struct mlx5e_flow_table inner_rss; }; -#ifdef RATELIMIT +struct mlx5e_xmit_args { + volatile s32 *pref; + u32 tisn; + u16 ihs; +}; + #include "en_rl.h" -#endif +#include "en_hw_tls.h" #define MLX5E_TSTMP_PREC 10 @@ -1035,9 +1044,10 @@ struct mlx5e_priv { int media_active_last; struct callout watchdog; -#ifdef RATELIMIT + struct mlx5e_rl_priv_data rl; -#endif + + struct mlx5e_tls tls; struct callout tstmp_clbr; int clbr_done; @@ -1092,6 +1102,8 @@ struct mlx5e_eeprom { #define MLX5E_FLD_MAX(typ, fld) ((1ULL << __mlx5_bit_sz(typ, fld)) - 1ULL) +bool mlx5e_do_send_cqe(struct mlx5e_sq *); +int mlx5e_get_full_header_size(struct mbuf *, struct tcphdr **); int mlx5e_xmit(struct ifnet *, struct mbuf *); int mlx5e_open_locked(struct ifnet *); @@ -1163,7 +1175,12 @@ void mlx5e_create_ethtool(struct mlx5e_priv *); void mlx5e_create_stats(struct sysctl_ctx_list *, struct sysctl_oid_list *, const char *, const char **, unsigned, u64 *); +void mlx5e_create_counter_stats(struct sysctl_ctx_list *, + struct sysctl_oid_list *, const char *, + const char **, unsigned, counter_u64_t *); void mlx5e_send_nop(struct mlx5e_sq *, u32); +int mlx5e_sq_dump_xmit(struct mlx5e_sq *, struct mlx5e_xmit_args *, struct mbuf **); +int mlx5e_sq_xmit(struct mlx5e_sq *, struct mbuf **); void mlx5e_sq_cev_timeout(void *); int mlx5e_refresh_channel_params(struct mlx5e_priv *); int mlx5e_open_cq(struct mlx5e_priv *, struct mlx5e_cq_param *, @@ -1183,4 +1200,9 @@ void mlx5e_refresh_sq_inline(struct mlx5e_priv *priv); int mlx5e_update_buf_lossy(struct mlx5e_priv *priv); int mlx5e_fec_update(struct mlx5e_priv *priv); +if_snd_tag_alloc_t mlx5e_ul_snd_tag_alloc; +if_snd_tag_modify_t mlx5e_ul_snd_tag_modify; +if_snd_tag_query_t mlx5e_ul_snd_tag_query; +if_snd_tag_free_t mlx5e_ul_snd_tag_free; + #endif /* _MLX5_EN_H_ */ diff --git a/sys/dev/mlx5/mlx5_en/en_hw_tls.h b/sys/dev/mlx5/mlx5_en/en_hw_tls.h new file mode 100644 index 000000000000..6faf5677e211 --- /dev/null +++ b/sys/dev/mlx5/mlx5_en/en_hw_tls.h @@ -0,0 +1,104 @@ +/*- + * Copyright (c) 2019 Mellanox Technologies. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _MLX5_TLS_H_ +#define _MLX5_TLS_H_ + +#include + +#define MLX5E_TLS_TAG_LOCK(tag) mtx_lock(&(tag)->mtx) +#define MLX5E_TLS_TAG_UNLOCK(tag) mtx_unlock(&(tag)->mtx) + +#define MLX5E_TLS_STAT_INC(tag, field, num) \ + counter_u64_add((tag)->tls->stats.field, num) + +enum { + MLX5E_TLS_LOOP = 0, + MLX5E_TLS_FAILURE = 1, + MLX5E_TLS_DEFERRED = 2, + MLX5E_TLS_CONTINUE = 3, +}; + +struct mlx5e_tls_tag { + struct mlx5e_snd_tag tag; + STAILQ_ENTRY(mlx5e_tls_tag) entry; + volatile s32 refs; /* number of pending mbufs */ + uint32_t tisn; /* HW TIS context number */ + uint32_t dek_index; /* HW TLS context number */ + struct mlx5e_tls *tls; + struct m_snd_tag *rl_tag; + struct mtx mtx; + uint32_t expected_seq; /* expected TCP sequence number */ + uint32_t state; /* see MLX5E_TLS_ST_XXX */ +#define MLX5E_TLS_ST_INIT 0 +#define MLX5E_TLS_ST_SETUP 1 +#define MLX5E_TLS_ST_TXRDY 2 +#define MLX5E_TLS_ST_FREED 3 + struct work_struct work; + + uint32_t dek_index_ok:1; + + /* parameters needed */ + uint8_t crypto_params[128] __aligned(4); +} __aligned(MLX5E_CACHELINE_SIZE); + +#define MLX5E_TLS_STATS(m) \ + m(+1, u64, tx_packets, "tx_packets", "Transmitted packets") \ + m(+1, u64, tx_bytes, "tx_bytes", "Transmitted bytes") \ + m(+1, u64, tx_packets_ooo, "tx_packets_ooo", "Transmitted packets out of order") \ + m(+1, u64, tx_bytes_ooo, "tx_bytes_ooo", "Transmitted bytes out of order") \ + m(+1, u64, tx_error, "tx_error", "Transmitted packets with error") + +#define MLX5E_TLS_STATS_NUM (0 MLX5E_TLS_STATS(MLX5E_STATS_COUNT)) + +struct mlx5e_tls_stats { + struct sysctl_ctx_list ctx; + counter_u64_t arg[0]; + MLX5E_TLS_STATS(MLX5E_STATS_COUNTER) +}; + +struct mlx5e_tls { + struct sysctl_ctx_list ctx; + struct mlx5e_tls_stats stats; + struct workqueue_struct *wq; + uma_zone_t zone; + uint32_t max_resources; /* max number of resources */ + volatile uint32_t num_resources; /* current number of resources */ + int init; /* set when ready */ + char zname[32]; +}; + +int mlx5e_tls_init(struct mlx5e_priv *); +void mlx5e_tls_cleanup(struct mlx5e_priv *); +int mlx5e_sq_tls_xmit(struct mlx5e_sq *, struct mlx5e_xmit_args *, struct mbuf **); + +if_snd_tag_alloc_t mlx5e_tls_snd_tag_alloc; +if_snd_tag_modify_t mlx5e_tls_snd_tag_modify; +if_snd_tag_query_t mlx5e_tls_snd_tag_query; +if_snd_tag_free_t mlx5e_tls_snd_tag_free; + +#endif /* _MLX5_TLS_H_ */ diff --git a/sys/dev/mlx5/mlx5_en/en_rl.h b/sys/dev/mlx5/mlx5_en/en_rl.h index 05c3e488b13e..10f9dc691f10 100644 --- a/sys/dev/mlx5/mlx5_en/en_rl.h +++ b/sys/dev/mlx5/mlx5_en/en_rl.h @@ -167,6 +167,7 @@ struct mlx5e_rl_priv_data { int mlx5e_rl_init(struct mlx5e_priv *priv); void mlx5e_rl_cleanup(struct mlx5e_priv *priv); void mlx5e_rl_refresh_sq_inline(struct mlx5e_rl_priv_data *rl); + if_snd_tag_alloc_t mlx5e_rl_snd_tag_alloc; if_snd_tag_modify_t mlx5e_rl_snd_tag_modify; if_snd_tag_query_t mlx5e_rl_snd_tag_query; diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c b/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c index e31117a0e039..7e742fb9ab39 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c @@ -48,6 +48,26 @@ mlx5e_create_stats(struct sysctl_ctx_list *ctx, } } +void +mlx5e_create_counter_stats(struct sysctl_ctx_list *ctx, + struct sysctl_oid_list *parent, const char *buffer, + const char **desc, unsigned num, counter_u64_t *arg) +{ + struct sysctl_oid *node; + unsigned x; + + sysctl_ctx_init(ctx); + + node = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, + buffer, CTLFLAG_RD, NULL, "Statistics"); + if (node == NULL) + return; + for (x = 0; x != num; x++) { + SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, + desc[2 * x], CTLFLAG_RD, arg + x, desc[2 * x + 1]); + } +} + static void mlx5e_ethtool_sync_tx_completion_fact(struct mlx5e_priv *priv) { diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c new file mode 100644 index 000000000000..70e442489d3b --- /dev/null +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c @@ -0,0 +1,834 @@ +/*- + * Copyright (c) 2019 Mellanox Technologies. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include "opt_kern_tls.h" + +#include "en.h" + +#include + +#include +#include +#include + +#ifdef KERN_TLS + +MALLOC_DEFINE(M_MLX5E_TLS, "MLX5E_TLS", "MLX5 ethernet HW TLS"); + +/* software TLS context */ +struct mlx5_ifc_sw_tls_cntx_bits { + struct mlx5_ifc_tls_static_params_bits param; + struct mlx5_ifc_tls_progress_params_bits progress; + struct { + uint8_t key_data[8][0x20]; + uint8_t key_len[0x20]; + } key; +}; + +CTASSERT(MLX5_ST_SZ_BYTES(sw_tls_cntx) <= sizeof(((struct mlx5e_tls_tag *)0)->crypto_params)); +CTASSERT(MLX5_ST_SZ_BYTES(mkc) == sizeof(((struct mlx5e_tx_umr_wqe *)0)->mkc)); + +static const char *mlx5e_tls_stats_desc[] = { + MLX5E_TLS_STATS(MLX5E_STATS_DESC) +}; + +static void mlx5e_tls_work(struct work_struct *); + +static int +mlx5e_tls_tag_zinit(void *mem, int size, int flags) +{ + struct mlx5e_tls_tag *ptag = mem; + + MPASS(size == sizeof(*ptag)); + + memset(ptag, 0, sizeof(*ptag)); + mtx_init(&ptag->mtx, "mlx5-tls-tag-mtx", NULL, MTX_DEF); + INIT_WORK(&ptag->work, mlx5e_tls_work); + + return (0); +} + +static void +mlx5e_tls_tag_zfini(void *mem, int size) +{ + struct mlx5e_tls_tag *ptag = mem; + struct mlx5e_priv *priv; + struct mlx5e_tls *ptls; + + ptls = ptag->tls; + priv = container_of(ptls, struct mlx5e_priv, tls); + + flush_work(&ptag->work); + + if (ptag->tisn != 0) { + mlx5_tls_close_tis(priv->mdev, ptag->tisn); + atomic_add_32(&ptls->num_resources, -1U); + } + + mtx_destroy(&ptag->mtx); +} + +static void +mlx5e_tls_tag_zfree(struct mlx5e_tls_tag *ptag) +{ + + /* reset some variables */ + ptag->state = MLX5E_TLS_ST_INIT; + ptag->dek_index = 0; + ptag->dek_index_ok = 0; + + /* avoid leaking keys */ + memset(ptag->crypto_params, 0, sizeof(ptag->crypto_params)); + + /* update number of TIS contexts */ + if (ptag->tisn == 0) + atomic_add_32(&ptag->tls->num_resources, -1U); + + /* return tag to UMA */ + uma_zfree(ptag->tls->zone, ptag); +} + +int +mlx5e_tls_init(struct mlx5e_priv *priv) +{ + struct mlx5e_tls *ptls = &priv->tls; + struct sysctl_oid *node; + uint32_t x; + + if (MLX5_CAP_GEN(priv->mdev, tls) == 0) + return (0); + + ptls->wq = create_singlethread_workqueue("mlx5-tls-wq"); + if (ptls->wq == NULL) + return (ENOMEM); + + sysctl_ctx_init(&ptls->ctx); + + snprintf(ptls->zname, sizeof(ptls->zname), + "mlx5_%u_tls", device_get_unit(priv->mdev->pdev->dev.bsddev)); + + ptls->zone = uma_zcreate(ptls->zname, sizeof(struct mlx5e_tls_tag), + NULL, NULL, mlx5e_tls_tag_zinit, mlx5e_tls_tag_zfini, UMA_ALIGN_CACHE, 0); + + ptls->max_resources = 1U << MLX5_CAP_GEN(priv->mdev, log_max_dek); + + for (x = 0; x != MLX5E_TLS_STATS_NUM; x++) + ptls->stats.arg[x] = counter_u64_alloc(M_WAITOK); + + ptls->init = 1; + + node = SYSCTL_ADD_NODE(&priv->sysctl_ctx, + SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, + "tls", CTLFLAG_RW, NULL, "Hardware TLS offload"); + if (node == NULL) + return (0); + + mlx5e_create_counter_stats(&ptls->ctx, + SYSCTL_CHILDREN(node), "stats", + mlx5e_tls_stats_desc, MLX5E_TLS_STATS_NUM, + ptls->stats.arg); + + return (0); +} + +void +mlx5e_tls_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5e_tls *ptls = &priv->tls; + uint32_t x; + + if (MLX5_CAP_GEN(priv->mdev, tls) == 0) + return; + + ptls->init = 0; + flush_workqueue(ptls->wq); + sysctl_ctx_free(&ptls->ctx); + uma_zdestroy(ptls->zone); + destroy_workqueue(ptls->wq); + + /* check if all resources are freed */ + MPASS(priv->tls.num_resources == 0); + + for (x = 0; x != MLX5E_TLS_STATS_NUM; x++) + counter_u64_free(ptls->stats.arg[x]); +} + +static void +mlx5e_tls_work(struct work_struct *work) +{ + struct mlx5e_tls_tag *ptag; + struct mlx5e_priv *priv; + int err; + + ptag = container_of(work, struct mlx5e_tls_tag, work); + priv = container_of(ptag->tls, struct mlx5e_priv, tls); + + switch (ptag->state) { + case MLX5E_TLS_ST_SETUP: + /* try to open TIS, if not present */ + if (ptag->tisn == 0) { + err = mlx5_tls_open_tis(priv->mdev, 0, priv->tdn, + priv->pdn, &ptag->tisn); + if (err) { + MLX5E_TLS_STAT_INC(ptag, tx_error, 1); + break; + } + } + MLX5_SET(sw_tls_cntx, ptag->crypto_params, progress.pd, ptag->tisn); + + /* try to allocate a DEK context ID */ + err = mlx5_encryption_key_create(priv->mdev, priv->pdn, + MLX5_ADDR_OF(sw_tls_cntx, ptag->crypto_params, key.key_data), + MLX5_GET(sw_tls_cntx, ptag->crypto_params, key.key_len), + &ptag->dek_index); + if (err) { + MLX5E_TLS_STAT_INC(ptag, tx_error, 1); + break; + } + + MLX5_SET(sw_tls_cntx, ptag->crypto_params, param.dek_index, ptag->dek_index); + + ptag->dek_index_ok = 1; + + MLX5E_TLS_TAG_LOCK(ptag); + if (ptag->state == MLX5E_TLS_ST_SETUP) + ptag->state = MLX5E_TLS_ST_TXRDY; + MLX5E_TLS_TAG_UNLOCK(ptag); + break; + + case MLX5E_TLS_ST_FREED: + /* wait for all refs to go away */ + while (ptag->refs != 0) + msleep(1); + + /* try to destroy DEK context by ID */ + if (ptag->dek_index_ok) + err = mlx5_encryption_key_destroy(priv->mdev, ptag->dek_index); + + /* free tag */ + mlx5e_tls_tag_zfree(ptag); + break; + + default: + break; + } +} + +static int +mlx5e_tls_set_params(void *ctx, const struct tls_session_params *en) +{ + + MLX5_SET(sw_tls_cntx, ctx, param.const_2, 2); + if (en->tls_vminor == TLS_MINOR_VER_TWO) + MLX5_SET(sw_tls_cntx, ctx, param.tls_version, 2); /* v1.2 */ + else + MLX5_SET(sw_tls_cntx, ctx, param.tls_version, 3); /* v1.3 */ + MLX5_SET(sw_tls_cntx, ctx, param.const_1, 1); + MLX5_SET(sw_tls_cntx, ctx, param.encryption_standard, 1); /* TLS */ + + /* copy the initial vector in place */ + if (en->iv_len == MLX5_FLD_SZ_BYTES(sw_tls_cntx, param.gcm_iv)) { + memcpy(MLX5_ADDR_OF(sw_tls_cntx, ctx, param.gcm_iv), + en->iv, MLX5_FLD_SZ_BYTES(sw_tls_cntx, param.gcm_iv)); + } else if (en->iv_len == (MLX5_FLD_SZ_BYTES(sw_tls_cntx, param.gcm_iv) + + MLX5_FLD_SZ_BYTES(sw_tls_cntx, param.implicit_iv))) { + memcpy(MLX5_ADDR_OF(sw_tls_cntx, ctx, param.gcm_iv), + (char *)en->iv + MLX5_FLD_SZ_BYTES(sw_tls_cntx, param.implicit_iv), + MLX5_FLD_SZ_BYTES(sw_tls_cntx, param.gcm_iv)); + memcpy(MLX5_ADDR_OF(sw_tls_cntx, ctx, param.implicit_iv), + en->iv, + MLX5_FLD_SZ_BYTES(sw_tls_cntx, param.implicit_iv)); + } else { + return (EINVAL); + } + + if (en->cipher_key_len <= MLX5_FLD_SZ_BYTES(sw_tls_cntx, key.key_data)) { + memcpy(MLX5_ADDR_OF(sw_tls_cntx, ctx, key.key_data), + en->cipher_key, en->cipher_key_len); + MLX5_SET(sw_tls_cntx, ctx, key.key_len, en->cipher_key_len); + } else { + return (EINVAL); + } + return (0); +} + +/* Verify zero default */ +CTASSERT(MLX5E_TLS_ST_INIT == 0); + +int +mlx5e_tls_snd_tag_alloc(struct ifnet *ifp, + union if_snd_tag_alloc_params *params, + struct m_snd_tag **ppmt) +{ + struct if_snd_tag_alloc_rate_limit rl_params; + struct mlx5e_priv *priv; + struct mlx5e_tls_tag *ptag; + const struct tls_session_params *en; + int error; + + priv = ifp->if_softc; + + if (priv->tls.init == 0) + return (EOPNOTSUPP); + + /* allocate new tag from zone, if any */ + ptag = uma_zalloc(priv->tls.zone, M_NOWAIT); + if (ptag == NULL) + return (ENOMEM); + + /* sanity check default values */ + MPASS(ptag->state == MLX5E_TLS_ST_INIT); + MPASS(ptag->dek_index == 0); + MPASS(ptag->dek_index_ok == 0); + + /* setup TLS tag */ + ptag->tls = &priv->tls; + ptag->tag.type = params->hdr.type; + + /* check if there is no TIS context */ + if (ptag->tisn == 0) { + uint32_t value; + + value = atomic_fetchadd_32(&priv->tls.num_resources, 1U); + + /* check resource limits */ + if (value >= priv->tls.max_resources) { + error = ENOMEM; + goto failure; + } + } + + en = ¶ms->tls.tls->params; + + /* only TLS v1.2 and v1.3 is currently supported */ + if (en->tls_vmajor != TLS_MAJOR_VER_ONE || + (en->tls_vminor != TLS_MINOR_VER_TWO +#ifdef TLS_MINOR_VER_THREE + && en->tls_vminor != TLS_MINOR_VER_THREE +#endif + )) { + error = EPROTONOSUPPORT; + goto failure; + } + + switch (en->cipher_algorithm) { + case CRYPTO_AES_NIST_GCM_16: + switch (en->cipher_key_len) { + case 128 / 8: + if (en->auth_algorithm != CRYPTO_AES_128_NIST_GMAC) { + error = EINVAL; + goto failure; + } + if (en->tls_vminor == TLS_MINOR_VER_TWO) { + if (MLX5_CAP_TLS(priv->mdev, tls_1_2_aes_gcm_128) == 0) { + error = EPROTONOSUPPORT; + goto failure; + } + } else { + if (MLX5_CAP_TLS(priv->mdev, tls_1_3_aes_gcm_128) == 0) { + error = EPROTONOSUPPORT; + goto failure; + } + } + error = mlx5e_tls_set_params(ptag->crypto_params, en); + if (error) + goto failure; + break; + + case 256 / 8: + if (en->auth_algorithm != CRYPTO_AES_256_NIST_GMAC) { + error = EINVAL; + goto failure; + } + if (en->tls_vminor == TLS_MINOR_VER_TWO) { + if (MLX5_CAP_TLS(priv->mdev, tls_1_2_aes_gcm_256) == 0) { + error = EPROTONOSUPPORT; + goto failure; + } + } else { + if (MLX5_CAP_TLS(priv->mdev, tls_1_3_aes_gcm_256) == 0) { + error = EPROTONOSUPPORT; + goto failure; + } + } + error = mlx5e_tls_set_params(ptag->crypto_params, en); + if (error) + goto failure; + break; + + default: + error = EINVAL; + goto failure; + } + break; + default: + error = EPROTONOSUPPORT; + goto failure; + } + + switch (ptag->tag.type) { +#if defined(RATELIMIT) && defined(IF_SND_TAG_TYPE_TLS_RATE_LIMIT) + case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: + memset(&rl_params, 0, sizeof(rl_params)); + rl_params.hdr = params->tls_rate_limit.hdr; + rl_params.hdr.type = IF_SND_TAG_TYPE_RATE_LIMIT; + rl_params.max_rate = params->tls_rate_limit.max_rate; + + error = mlx5e_rl_snd_tag_alloc(ifp, + container_of(&rl_params, union if_snd_tag_alloc_params, rate_limit), + &ptag->rl_tag); + if (error) + goto failure; + break; +#endif + case IF_SND_TAG_TYPE_TLS: + memset(&rl_params, 0, sizeof(rl_params)); + rl_params.hdr = params->tls.hdr; + rl_params.hdr.type = IF_SND_TAG_TYPE_UNLIMITED; + + error = mlx5e_ul_snd_tag_alloc(ifp, + container_of(&rl_params, union if_snd_tag_alloc_params, unlimited), + &ptag->rl_tag); + if (error) + goto failure; + break; + default: + error = EOPNOTSUPP; + goto failure; + } + + /* store pointer to mbuf tag */ + MPASS(ptag->tag.m_snd_tag.refcount == 0); + m_snd_tag_init(&ptag->tag.m_snd_tag, ifp); + *ppmt = &ptag->tag.m_snd_tag; + return (0); + +failure: + mlx5e_tls_tag_zfree(ptag); + return (error); +} + +int +mlx5e_tls_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_params *params) +{ +#if defined(RATELIMIT) && defined(IF_SND_TAG_TYPE_TLS_RATE_LIMIT) + struct if_snd_tag_rate_limit_params rl_params; + int error; +#endif + struct mlx5e_tls_tag *ptag = + container_of(pmt, struct mlx5e_tls_tag, tag.m_snd_tag); + + switch (ptag->tag.type) { +#if defined(RATELIMIT) && defined(IF_SND_TAG_TYPE_TLS_RATE_LIMIT) + case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: + memset(&rl_params, 0, sizeof(rl_params)); + rl_params.max_rate = params->tls_rate_limit.max_rate; + error = mlx5e_rl_snd_tag_modify(ptag->rl_tag, + container_of(&rl_params, union if_snd_tag_modify_params, rate_limit)); + return (error); +#endif + default: + return (EOPNOTSUPP); + } +} + +int +mlx5e_tls_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params) +{ + struct mlx5e_tls_tag *ptag = + container_of(pmt, struct mlx5e_tls_tag, tag.m_snd_tag); + int error; + + switch (ptag->tag.type) { +#if defined(RATELIMIT) && defined(IF_SND_TAG_TYPE_TLS_RATE_LIMIT) + case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: + error = mlx5e_rl_snd_tag_query(ptag->rl_tag, params); + break; +#endif + case IF_SND_TAG_TYPE_TLS: + error = mlx5e_ul_snd_tag_query(ptag->rl_tag, params); + break; + default: + error = EOPNOTSUPP; + break; + } + return (error); +} + +void +mlx5e_tls_snd_tag_free(struct m_snd_tag *pmt) +{ + struct mlx5e_tls_tag *ptag = + container_of(pmt, struct mlx5e_tls_tag, tag.m_snd_tag); + struct mlx5e_priv *priv; + + switch (ptag->tag.type) { +#if defined(RATELIMIT) && defined(IF_SND_TAG_TYPE_TLS_RATE_LIMIT) + case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: + mlx5e_rl_snd_tag_free(ptag->rl_tag); + break; +#endif + case IF_SND_TAG_TYPE_TLS: + mlx5e_ul_snd_tag_free(ptag->rl_tag); + break; + default: + break; + } + + MLX5E_TLS_TAG_LOCK(ptag); + ptag->state = MLX5E_TLS_ST_FREED; + MLX5E_TLS_TAG_UNLOCK(ptag); + + priv = ptag->tag.m_snd_tag.ifp->if_softc; + queue_work(priv->tls.wq, &ptag->work); +} + +CTASSERT((MLX5_FLD_SZ_BYTES(sw_tls_cntx, param) % 16) == 0); + +static void +mlx5e_tls_send_static_parameters(struct mlx5e_sq *sq, struct mlx5e_tls_tag *ptag) +{ + const u32 ds_cnt = DIV_ROUND_UP(sizeof(struct mlx5e_tx_umr_wqe) + + MLX5_FLD_SZ_BYTES(sw_tls_cntx, param), MLX5_SEND_WQE_DS); + struct mlx5e_tx_umr_wqe *wqe; + u16 pi; + + pi = sq->pc & sq->wq.sz_m1; + wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); + + memset(wqe, 0, sizeof(*wqe)); + + wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | + MLX5_OPCODE_UMR | (MLX5_OPCODE_MOD_UMR_TLS_TIS_STATIC_PARAMS << 24)); + wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + wqe->ctrl.imm = cpu_to_be32(ptag->tisn << 8); + + if (mlx5e_do_send_cqe(sq)) + wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE | MLX5_FENCE_MODE_INITIATOR_SMALL; + else + wqe->ctrl.fm_ce_se = MLX5_FENCE_MODE_INITIATOR_SMALL; + + /* fill out UMR control segment */ + wqe->umr.flags = 0x80; /* inline data */ + wqe->umr.bsf_octowords = cpu_to_be16(MLX5_FLD_SZ_BYTES(sw_tls_cntx, param) / 16); + + /* copy in the static crypto parameters */ + memcpy(wqe + 1, MLX5_ADDR_OF(sw_tls_cntx, ptag->crypto_params, param), + MLX5_FLD_SZ_BYTES(sw_tls_cntx, param)); + + /* copy data for doorbell */ + memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); + + sq->mbuf[pi].mbuf = NULL; + sq->mbuf[pi].num_bytes = 0; + sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); + sq->mbuf[pi].p_refcount = &ptag->refs; + atomic_add_int(&ptag->refs, 1); + sq->pc += sq->mbuf[pi].num_wqebbs; +} + +CTASSERT(MLX5_FLD_SZ_BYTES(sw_tls_cntx, progress) == + sizeof(((struct mlx5e_tx_psv_wqe *)0)->psv)); + +static void +mlx5e_tls_send_progress_parameters(struct mlx5e_sq *sq, struct mlx5e_tls_tag *ptag) +{ + const u32 ds_cnt = DIV_ROUND_UP(sizeof(struct mlx5e_tx_psv_wqe), + MLX5_SEND_WQE_DS); + struct mlx5e_tx_psv_wqe *wqe; + u16 pi; + + pi = sq->pc & sq->wq.sz_m1; + wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); + + memset(wqe, 0, sizeof(*wqe)); + + wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | + MLX5_OPCODE_SET_PSV | (MLX5_OPCODE_MOD_PSV_TLS_TIS_PROGRESS_PARAMS << 24)); + wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + + if (mlx5e_do_send_cqe(sq)) + wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE | MLX5_FENCE_MODE_INITIATOR_SMALL; + else + wqe->ctrl.fm_ce_se = MLX5_FENCE_MODE_INITIATOR_SMALL; + + /* copy in the PSV control segment */ + memcpy(&wqe->psv, MLX5_ADDR_OF(sw_tls_cntx, ptag->crypto_params, progress), + sizeof(wqe->psv)); + + /* copy data for doorbell */ + memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); + + sq->mbuf[pi].mbuf = NULL; + sq->mbuf[pi].num_bytes = 0; + sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); + sq->mbuf[pi].p_refcount = &ptag->refs; + atomic_add_int(&ptag->refs, 1); + sq->pc += sq->mbuf[pi].num_wqebbs; +} + +static void +mlx5e_tls_send_nop(struct mlx5e_sq *sq, struct mlx5e_tls_tag *ptag) +{ + const u32 ds_cnt = MLX5_SEND_WQEBB_NUM_DS; + struct mlx5e_tx_wqe *wqe; + u16 pi; + + pi = sq->pc & sq->wq.sz_m1; + wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); + + memset(&wqe->ctrl, 0, sizeof(wqe->ctrl)); + + wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); + wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + if (mlx5e_do_send_cqe(sq)) + wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE | MLX5_FENCE_MODE_INITIATOR_SMALL; + else + wqe->ctrl.fm_ce_se = MLX5_FENCE_MODE_INITIATOR_SMALL; + + /* Copy data for doorbell */ + memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); + + sq->mbuf[pi].mbuf = NULL; + sq->mbuf[pi].num_bytes = 0; + sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); + sq->mbuf[pi].p_refcount = &ptag->refs; + atomic_add_int(&ptag->refs, 1); + sq->pc += sq->mbuf[pi].num_wqebbs; +} + +#define SBTLS_MBUF_NO_DATA ((struct mbuf *)1) + +static struct mbuf * +sbtls_recover_record(struct mbuf *mb, int wait, uint32_t tcp_old, uint32_t *ptcp_seq) +{ + struct mbuf *mr; + uint32_t offset; + uint32_t delta; + + /* check format of incoming mbuf */ + if (mb->m_next == NULL || + (mb->m_next->m_flags & (M_NOMAP | M_EXT)) != (M_NOMAP | M_EXT) || + mb->m_next->m_ext.ext_buf == NULL) { + mr = NULL; + goto done; + } + + /* get unmapped data offset */ + offset = mtod(mb->m_next, uintptr_t); + + /* check if we don't need to re-transmit anything */ + if (offset == 0) { + mr = SBTLS_MBUF_NO_DATA; + goto done; + } + + /* try to get a new mbufs with packet header */ + mr = m_gethdr(wait, MT_DATA); + if (mr == NULL) + goto done; + + mb_dupcl(mr, mb->m_next); + + /* the beginning of the TLS record */ + mr->m_data = NULL; + + /* setup packet header length */ + mr->m_pkthdr.len = mr->m_len = offset; + + /* check for partial re-transmit */ + delta = *ptcp_seq - tcp_old; + + if (delta < offset) { + m_adj(mr, offset - delta); + offset = delta; + } + + /* + * Rewind the TCP sequence number by the amount of data + * retransmitted: + */ + *ptcp_seq -= offset; +done: + return (mr); +} + +static int +mlx5e_sq_tls_populate(struct mbuf *mb, uint64_t *pseq) +{ + struct mbuf_ext_pgs *ext_pgs; + + for (; mb != NULL; mb = mb->m_next) { + if (!(mb->m_flags & M_NOMAP)) + continue; + ext_pgs = (void *)mb->m_ext.ext_buf; + *pseq = ext_pgs->seqno; + return (1); + } + return (0); +} + +int +mlx5e_sq_tls_xmit(struct mlx5e_sq *sq, struct mlx5e_xmit_args *parg, struct mbuf **ppmb) +{ + struct mlx5e_tls_tag *ptls_tag; + struct mlx5e_snd_tag *ptag; + struct tcphdr *th; + struct mbuf *mb = *ppmb; + u64 rcd_sn; + u32 header_size; + u32 mb_seq; + + if ((mb->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0) + return (MLX5E_TLS_CONTINUE); + + ptag = container_of(mb->m_pkthdr.snd_tag, + struct mlx5e_snd_tag, m_snd_tag); + + if ( +#if defined(RATELIMIT) && defined(IF_SND_TAG_TYPE_TLS_RATE_LIMIT) + ptag->type != IF_SND_TAG_TYPE_TLS_RATE_LIMIT && +#endif + ptag->type != IF_SND_TAG_TYPE_TLS) + return (MLX5E_TLS_CONTINUE); + + ptls_tag = container_of(ptag, struct mlx5e_tls_tag, tag); + + header_size = mlx5e_get_full_header_size(mb, &th); + if (unlikely(header_size == 0 || th == NULL)) + return (MLX5E_TLS_FAILURE); + + /* + * Send non-TLS TCP packets AS-IS: + */ + if (header_size == mb->m_pkthdr.len || + mlx5e_sq_tls_populate(mb, &rcd_sn) == 0) { + parg->tisn = 0; + parg->ihs = header_size; + return (MLX5E_TLS_CONTINUE); + } + + mb_seq = ntohl(th->th_seq); + + MLX5E_TLS_TAG_LOCK(ptls_tag); + switch (ptls_tag->state) { + case MLX5E_TLS_ST_INIT: + queue_work(sq->priv->tls.wq, &ptls_tag->work); + ptls_tag->state = MLX5E_TLS_ST_SETUP; + ptls_tag->expected_seq = ~mb_seq; /* force setup */ + MLX5E_TLS_TAG_UNLOCK(ptls_tag); + return (MLX5E_TLS_FAILURE); + + case MLX5E_TLS_ST_SETUP: + MLX5E_TLS_TAG_UNLOCK(ptls_tag); + return (MLX5E_TLS_FAILURE); + + default: + MLX5E_TLS_TAG_UNLOCK(ptls_tag); + break; + } + + if (unlikely(ptls_tag->expected_seq != mb_seq)) { + struct mbuf *r_mb; + uint32_t tcp_seq = mb_seq; + + r_mb = sbtls_recover_record(mb, M_NOWAIT, ptls_tag->expected_seq, &tcp_seq); + if (r_mb == NULL) { + MLX5E_TLS_STAT_INC(ptls_tag, tx_error, 1); + return (MLX5E_TLS_FAILURE); + } + + MLX5E_TLS_STAT_INC(ptls_tag, tx_packets_ooo, 1); + + /* check if this is the first fragment of a TLS record */ + if (r_mb == SBTLS_MBUF_NO_DATA || r_mb->m_data == NULL) { + /* setup TLS static parameters */ + MLX5_SET64(sw_tls_cntx, ptls_tag->crypto_params, + param.initial_record_number, rcd_sn); + + /* setup TLS progress parameters */ + MLX5_SET(sw_tls_cntx, ptls_tag->crypto_params, + progress.next_record_tcp_sn, tcp_seq); + + /* + * NOTE: The sendqueue should have enough room to + * carry both the static and the progress parameters + * when we get here! + */ + mlx5e_tls_send_static_parameters(sq, ptls_tag); + mlx5e_tls_send_progress_parameters(sq, ptls_tag); + + if (r_mb == SBTLS_MBUF_NO_DATA) { + mlx5e_tls_send_nop(sq, ptls_tag); + ptls_tag->expected_seq = mb_seq; + return (MLX5E_TLS_LOOP); + } + } + + MLX5E_TLS_STAT_INC(ptls_tag, tx_bytes_ooo, r_mb->m_pkthdr.len); + + /* setup transmit arguments */ + parg->tisn = ptls_tag->tisn; + parg->pref = &ptls_tag->refs; + + /* try to send DUMP data */ + if (mlx5e_sq_dump_xmit(sq, parg, &r_mb) != 0) { + m_freem(r_mb); + ptls_tag->expected_seq = tcp_seq; + return (MLX5E_TLS_FAILURE); + } else { + ptls_tag->expected_seq = mb_seq; + return (MLX5E_TLS_LOOP); + } + } else { + MLX5E_TLS_STAT_INC(ptls_tag, tx_packets, 1); + MLX5E_TLS_STAT_INC(ptls_tag, tx_bytes, mb->m_pkthdr.len); + } + ptls_tag->expected_seq += mb->m_pkthdr.len - header_size; + + parg->tisn = ptls_tag->tisn; + parg->ihs = header_size; + parg->pref = &ptls_tag->refs; + return (MLX5E_TLS_CONTINUE); +} + +#else + +int +mlx5e_tls_init(struct mlx5e_priv *priv) +{ + + return (0); +} + +void +mlx5e_tls_cleanup(struct mlx5e_priv *priv) +{ + /* NOP */ +} + +#endif /* KERN_TLS */ diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c index 51ffa485f737..2c8b46af12cd 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c @@ -25,6 +25,8 @@ * $FreeBSD$ */ +#include "opt_kern_tls.h" + #include "en.h" #include @@ -1502,6 +1504,10 @@ mlx5e_free_sq_db(struct mlx5e_sq *sq) int x; for (x = 0; x != wq_sz; x++) { + if (unlikely(sq->mbuf[x].p_refcount != NULL)) { + atomic_add_int(sq->mbuf[x].p_refcount, -1); + sq->mbuf[x].p_refcount = NULL; + } if (sq->mbuf[x].mbuf != NULL) { bus_dmamap_unload(sq->dma_tag, sq->mbuf[x].dma_map); m_freem(sq->mbuf[x].mbuf); @@ -3000,6 +3006,11 @@ mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu) } priv->params_ethtool.hw_mtu = hw_mtu; + /* compute MSB */ + while (hw_mtu & (hw_mtu - 1)) + hw_mtu &= (hw_mtu - 1); + priv->params_ethtool.hw_mtu_msb = hw_mtu; + return (err); } @@ -3311,6 +3322,10 @@ mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data) } if (mask & IFCAP_NOMAP) ifp->if_capenable ^= IFCAP_NOMAP; + if (mask & IFCAP_TXTLS4) + ifp->if_capenable ^= IFCAP_TXTLS4; + if (mask & IFCAP_TXTLS6) + ifp->if_capenable ^= IFCAP_TXTLS6; if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) @@ -3596,6 +3611,7 @@ mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, umr_en, 1); /* used by HW TLS */ MLX5_SET(mkc, mkc, lw, 1); MLX5_SET(mkc, mkc, lr, 1); @@ -3991,7 +4007,7 @@ mlx5e_setup_pauseframes(struct mlx5e_priv *priv) PRIV_UNLOCK(priv); } -static int +int mlx5e_ul_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, struct m_snd_tag **ppmt) @@ -4033,7 +4049,7 @@ mlx5e_ul_snd_tag_alloc(struct ifnet *ifp, } } -static int +int mlx5e_ul_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params) { struct mlx5e_channel *pch = @@ -4044,7 +4060,7 @@ mlx5e_ul_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *par return (0); } -static void +void mlx5e_ul_snd_tag_free(struct m_snd_tag *pmt) { struct mlx5e_channel *pch = @@ -4063,9 +4079,17 @@ mlx5e_snd_tag_alloc(struct ifnet *ifp, #ifdef RATELIMIT case IF_SND_TAG_TYPE_RATE_LIMIT: return (mlx5e_rl_snd_tag_alloc(ifp, params, ppmt)); +#if defined(KERN_TLS) && defined(IF_SND_TAG_TYPE_TLS_RATE_LIMIT) + case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: + return (mlx5e_tls_snd_tag_alloc(ifp, params, ppmt)); +#endif #endif case IF_SND_TAG_TYPE_UNLIMITED: return (mlx5e_ul_snd_tag_alloc(ifp, params, ppmt)); +#ifdef KERN_TLS + case IF_SND_TAG_TYPE_TLS: + return (mlx5e_tls_snd_tag_alloc(ifp, params, ppmt)); +#endif default: return (EOPNOTSUPP); } @@ -4081,8 +4105,15 @@ mlx5e_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_params *para #ifdef RATELIMIT case IF_SND_TAG_TYPE_RATE_LIMIT: return (mlx5e_rl_snd_tag_modify(pmt, params)); +#if defined(KERN_TLS) && defined(IF_SND_TAG_TYPE_TLS_RATE_LIMIT) + case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: + return (mlx5e_tls_snd_tag_modify(pmt, params)); +#endif #endif case IF_SND_TAG_TYPE_UNLIMITED: +#ifdef KERN_TLS + case IF_SND_TAG_TYPE_TLS: +#endif default: return (EOPNOTSUPP); } @@ -4098,9 +4129,17 @@ mlx5e_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params #ifdef RATELIMIT case IF_SND_TAG_TYPE_RATE_LIMIT: return (mlx5e_rl_snd_tag_query(pmt, params)); +#if defined(KERN_TLS) && defined(IF_SND_TAG_TYPE_TLS_RATE_LIMIT) + case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: + return (mlx5e_tls_snd_tag_query(pmt, params)); +#endif #endif case IF_SND_TAG_TYPE_UNLIMITED: return (mlx5e_ul_snd_tag_query(pmt, params)); +#ifdef KERN_TLS + case IF_SND_TAG_TYPE_TLS: + return (mlx5e_tls_snd_tag_query(pmt, params)); +#endif default: return (EOPNOTSUPP); } @@ -4161,10 +4200,20 @@ mlx5e_snd_tag_free(struct m_snd_tag *pmt) case IF_SND_TAG_TYPE_RATE_LIMIT: mlx5e_rl_snd_tag_free(pmt); break; +#if defined(KERN_TLS) && defined(IF_SND_TAG_TYPE_TLS_RATE_LIMIT) + case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: + mlx5e_tls_snd_tag_free(pmt); + break; +#endif #endif case IF_SND_TAG_TYPE_UNLIMITED: mlx5e_ul_snd_tag_free(pmt); break; +#ifdef KERN_TLS + case IF_SND_TAG_TYPE_TLS: + mlx5e_tls_snd_tag_free(pmt); + break; +#endif default: break; } @@ -4232,6 +4281,7 @@ mlx5e_create_ifp(struct mlx5_core_dev *mdev) ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO; ifp->if_capabilities |= IFCAP_HWSTATS | IFCAP_HWRXTSTMP; ifp->if_capabilities |= IFCAP_NOMAP; + ifp->if_capabilities |= IFCAP_TXTLS4 | IFCAP_TXTLS6; ifp->if_capabilities |= IFCAP_TXRTLMT; ifp->if_snd_tag_alloc = mlx5e_snd_tag_alloc; ifp->if_snd_tag_free = mlx5e_snd_tag_free; @@ -4317,13 +4367,18 @@ mlx5e_create_ifp(struct mlx5_core_dev *mdev) random_ether_addr(dev_addr); mlx5_en_err(ifp, "Assigned random MAC address\n"); } -#ifdef RATELIMIT + err = mlx5e_rl_init(priv); if (err) { mlx5_en_err(ifp, "mlx5e_rl_init failed, %d\n", err); goto err_create_mkey; } -#endif + + err = mlx5e_tls_init(priv); + if (err) { + if_printf(ifp, "%s: mlx5e_tls_init failed\n", __func__); + goto err_rl_init; + } /* set default MTU */ mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu); @@ -4424,10 +4479,12 @@ mlx5e_create_ifp(struct mlx5_core_dev *mdev) return (priv); -#ifdef RATELIMIT +err_rl_init: + mlx5e_rl_cleanup(priv); + err_create_mkey: mlx5_core_destroy_mkey(priv->mdev, &priv->mr); -#endif + err_dealloc_transport_domain: mlx5_dealloc_transport_domain(mdev, priv->tdn); @@ -4504,9 +4561,9 @@ mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv) ifmedia_removeall(&priv->media); ether_ifdetach(ifp); -#ifdef RATELIMIT + mlx5e_tls_cleanup(priv); mlx5e_rl_cleanup(priv); -#endif + /* destroy all remaining sysctl nodes */ sysctl_ctx_free(&priv->stats.vport.ctx); sysctl_ctx_free(&priv->stats.pport.ctx); diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c b/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c index 56c578273304..be70af01a6d4 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c @@ -1573,4 +1573,19 @@ mlx5e_rl_sysctl_add_stats_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, CTLFLAG_RD, &rl->stats.arg[x], 0, desc); } -#endif +#else + +int +mlx5e_rl_init(struct mlx5e_priv *priv) +{ + + return (0); +} + +void +mlx5e_rl_cleanup(struct mlx5e_priv *priv) +{ + /* NOP */ +} + +#endif /* RATELIMIT */ diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c index ae84880f8e1a..3d287d12caa7 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015-2019 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -25,19 +25,28 @@ * $FreeBSD$ */ +#include "opt_kern_tls.h" + #include "en.h" #include static inline bool -mlx5e_do_send_cqe(struct mlx5e_sq *sq) +mlx5e_do_send_cqe_inline(struct mlx5e_sq *sq) { sq->cev_counter++; /* interleave the CQEs */ if (sq->cev_counter >= sq->cev_factor) { sq->cev_counter = 0; - return (1); + return (true); } - return (0); + return (false); +} + +bool +mlx5e_do_send_cqe(struct mlx5e_sq *sq) +{ + + return (mlx5e_do_send_cqe_inline(sq)); } void @@ -50,7 +59,7 @@ mlx5e_send_nop(struct mlx5e_sq *sq, u32 ds_cnt) wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - if (mlx5e_do_send_cqe(sq)) + if (mlx5e_do_send_cqe_inline(sq)) wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; else wqe->ctrl.fm_ce_se = 0; @@ -80,12 +89,17 @@ SYSINIT(mlx5e_hash_init, SI_SUB_RANDOM, SI_ORDER_ANY, &mlx5e_hash_init, NULL); static struct mlx5e_sq * mlx5e_select_queue_by_send_tag(struct ifnet *ifp, struct mbuf *mb) { + struct m_snd_tag *mb_tag; struct mlx5e_snd_tag *ptag; struct mlx5e_sq *sq; + mb_tag = mb->m_pkthdr.snd_tag; + +#ifdef KERN_TLS +top: +#endif /* get pointer to sendqueue */ - ptag = container_of(mb->m_pkthdr.snd_tag, - struct mlx5e_snd_tag, m_snd_tag); + ptag = container_of(mb_tag, struct mlx5e_snd_tag, m_snd_tag); switch (ptag->type) { #ifdef RATELIMIT @@ -93,6 +107,11 @@ mlx5e_select_queue_by_send_tag(struct ifnet *ifp, struct mbuf *mb) sq = container_of(ptag, struct mlx5e_rl_channel, tag)->sq; break; +#if defined(KERN_TLS) && defined(IF_SND_TAG_TYPE_TLS_RATE_LIMIT) + case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: + mb_tag = container_of(ptag, struct mlx5e_tls_tag, tag)->rl_tag; + goto top; +#endif #endif case IF_SND_TAG_TYPE_UNLIMITED: sq = &container_of(ptag, @@ -100,6 +119,11 @@ mlx5e_select_queue_by_send_tag(struct ifnet *ifp, struct mbuf *mb) KASSERT((ptag->m_snd_tag.refcount > 0), ("mlx5e_select_queue: Channel refs are zero for unlimited tag")); break; +#ifdef KERN_TLS + case IF_SND_TAG_TYPE_TLS: + mb_tag = container_of(ptag, struct mlx5e_tls_tag, tag)->rl_tag; + goto top; +#endif default: sq = NULL; break; @@ -208,8 +232,20 @@ max_inline: return (MIN(mb->m_pkthdr.len, sq->max_inline)); } -static int -mlx5e_get_full_header_size(struct mbuf *mb) +/* + * This function parse IPv4 and IPv6 packets looking for TCP and UDP + * headers. + * + * Upon return the pointer at which the "ppth" argument points, is set + * to the location of the TCP header. NULL is used if no TCP header is + * present. + * + * The return value indicates the number of bytes from the beginning + * of the packet until the first byte after the TCP or UDP header. If + * this function returns zero, the parsing failed. + */ +int +mlx5e_get_full_header_size(struct mbuf *mb, struct tcphdr **ppth) { struct ether_vlan_header *eh; struct tcphdr *th; @@ -221,84 +257,241 @@ mlx5e_get_full_header_size(struct mbuf *mb) eh = mtod(mb, struct ether_vlan_header *); if (mb->m_len < ETHER_HDR_LEN) - return (0); + goto failure; if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { if (mb->m_len < (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)) - return (0); + goto failure; eth_type = ntohs(eh->evl_proto); eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { eth_type = ntohs(eh->evl_encap_proto); eth_hdr_len = ETHER_HDR_LEN; } + switch (eth_type) { case ETHERTYPE_IP: ip = (struct ip *)(mb->m_data + eth_hdr_len); if (mb->m_len < eth_hdr_len + sizeof(*ip)) - return (0); + goto failure; switch (ip->ip_p) { case IPPROTO_TCP: ip_hlen = ip->ip_hl << 2; eth_hdr_len += ip_hlen; - break; + goto tcp_packet; case IPPROTO_UDP: ip_hlen = ip->ip_hl << 2; eth_hdr_len += ip_hlen + 8; - goto done; + th = NULL; + goto udp_packet; default: - return (0); + goto failure; } break; case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mb->m_data + eth_hdr_len); if (mb->m_len < eth_hdr_len + sizeof(*ip6)) - return (0); + goto failure; switch (ip6->ip6_nxt) { case IPPROTO_TCP: eth_hdr_len += sizeof(*ip6); - break; + goto tcp_packet; case IPPROTO_UDP: eth_hdr_len += sizeof(*ip6) + 8; - goto done; + th = NULL; + goto udp_packet; default: - return (0); + goto failure; } break; default: - return (0); + goto failure; } +tcp_packet: if (mb->m_len < eth_hdr_len + sizeof(*th)) - return (0); + goto failure; th = (struct tcphdr *)(mb->m_data + eth_hdr_len); tcp_hlen = th->th_off << 2; eth_hdr_len += tcp_hlen; -done: +udp_packet: /* * m_copydata() will be used on the remaining header which * does not need to reside within the first m_len bytes of * data: */ if (mb->m_pkthdr.len < eth_hdr_len) - return (0); + goto failure; + if (ppth != NULL) + *ppth = th; return (eth_hdr_len); +failure: + if (ppth != NULL) + *ppth = NULL; + return (0); } -static int +struct mlx5_wqe_dump_seg { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_data_seg data; +} __aligned(MLX5_SEND_WQE_BB); + +CTASSERT(DIV_ROUND_UP(2, MLX5_SEND_WQEBB_NUM_DS) == 1); + +int +mlx5e_sq_dump_xmit(struct mlx5e_sq *sq, struct mlx5e_xmit_args *parg, struct mbuf **mbp) +{ + bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS]; + struct mlx5_wqe_dump_seg *wqe; + struct mlx5_wqe_dump_seg *wqe_last; + int nsegs; + int xsegs; + u32 off; + u32 msb; + int err; + int x; + struct mbuf *mb; + const u32 ds_cnt = 2; + u16 pi; + const u8 opcode = MLX5_OPCODE_DUMP; + + /* get pointer to mbuf */ + mb = *mbp; + + /* get producer index */ + pi = sq->pc & sq->wq.sz_m1; + + sq->mbuf[pi].num_bytes = mb->m_pkthdr.len; + sq->mbuf[pi].num_wqebbs = 0; + + /* check number of segments in mbuf */ + err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, + mb, segs, &nsegs, BUS_DMA_NOWAIT); + if (err == EFBIG) { + /* update statistics */ + sq->stats.defragged++; + /* too many mbuf fragments */ + mb = m_defrag(*mbp, M_NOWAIT); + if (mb == NULL) { + mb = *mbp; + goto tx_drop; + } + /* try again */ + err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, + mb, segs, &nsegs, BUS_DMA_NOWAIT); + } + + if (err != 0) + goto tx_drop; + + /* make sure all mbuf data, if any, is visible to the bus */ + bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map, + BUS_DMASYNC_PREWRITE); + + /* compute number of real DUMP segments */ + msb = sq->priv->params_ethtool.hw_mtu_msb; + for (x = xsegs = 0; x != nsegs; x++) + xsegs += howmany((u32)segs[x].ds_len, msb); + + /* check if there are no segments */ + if (unlikely(xsegs == 0)) { + bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map); + m_freem(mb); + *mbp = NULL; /* safety clear */ + return (0); + } + + /* return ENOBUFS if the queue is full */ + if (unlikely(!mlx5e_sq_has_room_for(sq, xsegs))) { + bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map); + m_freem(mb); + *mbp = NULL; /* safety clear */ + return (ENOBUFS); + } + + wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); + wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, sq->wq.sz_m1); + + for (x = 0; x != nsegs; x++) { + for (off = 0; off < segs[x].ds_len; off += msb) { + u32 len = segs[x].ds_len - off; + + /* limit length */ + if (likely(len > msb)) + len = msb; + + memset(&wqe->ctrl, 0, sizeof(wqe->ctrl)); + + /* fill control segment */ + wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); + wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + wqe->ctrl.imm = cpu_to_be32(parg->tisn << 8); + + /* fill data segment */ + wqe->data.addr = cpu_to_be64((uint64_t)segs[x].ds_addr + off); + wqe->data.lkey = sq->mkey_be; + wqe->data.byte_count = cpu_to_be32(len); + + /* advance to next building block */ + if (unlikely(wqe == wqe_last)) + wqe = mlx5_wq_cyc_get_wqe(&sq->wq, 0); + else + wqe++; + + sq->mbuf[pi].num_wqebbs++; + sq->pc++; + } + } + + wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); + wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, (sq->pc - 1) & sq->wq.sz_m1); + + /* put in place data fence */ + wqe->ctrl.fm_ce_se |= MLX5_FENCE_MODE_INITIATOR_SMALL; + + /* check if we should generate a completion event */ + if (mlx5e_do_send_cqe_inline(sq)) + wqe_last->ctrl.fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE; + + /* copy data for doorbell */ + memcpy(sq->doorbell.d32, wqe_last, sizeof(sq->doorbell.d32)); + + /* store pointer to mbuf */ + sq->mbuf[pi].mbuf = mb; + sq->mbuf[pi].p_refcount = parg->pref; + atomic_add_int(parg->pref, 1); + + /* count all traffic going out */ + sq->stats.packets++; + sq->stats.bytes += sq->mbuf[pi].num_bytes; + + *mbp = NULL; /* safety clear */ + return (0); + +tx_drop: + sq->stats.dropped++; + *mbp = NULL; + m_freem(mb); + return err; +} + +int mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp) { bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS]; + struct mlx5e_xmit_args args = {}; struct mlx5_wqe_data_seg *dseg; struct mlx5e_tx_wqe *wqe; struct ifnet *ifp; int nsegs; int err; int x; - struct mbuf *mb = *mbp; + struct mbuf *mb; u16 ds_cnt; - u16 ihs; u16 pi; u8 opcode; +#ifdef KERN_TLS +top: +#endif /* Return ENOBUFS if the queue is full */ if (unlikely(!mlx5e_sq_has_room_for(sq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) return (ENOBUFS); @@ -313,6 +506,23 @@ mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp) return (ENOMEM); } +#ifdef KERN_TLS + /* Special handling for TLS packets, if any */ + switch (mlx5e_sq_tls_xmit(sq, &args, mbp)) { + case MLX5E_TLS_LOOP: + goto top; + case MLX5E_TLS_FAILURE: + mb = *mbp; + err = ENOMEM; + goto tx_drop; + case MLX5E_TLS_DEFERRED: + return (0); + case MLX5E_TLS_CONTINUE: + default: + break; + } +#endif + /* Setup local variables */ pi = sq->pc & sq->wq.sz_m1; wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); @@ -320,6 +530,9 @@ mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp) memset(wqe, 0, sizeof(*wqe)); + /* get pointer to mbuf */ + mb = *mbp; + /* Send a copy of the frame to the BPF listener, if any */ if (ifp != NULL && ifp->if_bpf != NULL) ETHER_BPF_MTAP(ifp, mb); @@ -340,59 +553,62 @@ mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp) wqe->eth.mss = cpu_to_be16(mss); opcode = MLX5_OPCODE_LSO; - ihs = mlx5e_get_full_header_size(mb); - if (unlikely(ihs == 0)) { + if (args.ihs == 0) + args.ihs = mlx5e_get_full_header_size(mb, NULL); + if (unlikely(args.ihs == 0)) { err = EINVAL; goto tx_drop; } - payload_len = mb->m_pkthdr.len - ihs; + payload_len = mb->m_pkthdr.len - args.ihs; if (payload_len == 0) num_pkts = 1; else num_pkts = DIV_ROUND_UP(payload_len, mss); - sq->mbuf[pi].num_bytes = payload_len + (num_pkts * ihs); + sq->mbuf[pi].num_bytes = payload_len + (num_pkts * args.ihs); sq->stats.tso_packets++; sq->stats.tso_bytes += payload_len; } else { opcode = MLX5_OPCODE_SEND; - switch (sq->min_inline_mode) { - case MLX5_INLINE_MODE_IP: - case MLX5_INLINE_MODE_TCP_UDP: - ihs = mlx5e_get_full_header_size(mb); - if (unlikely(ihs == 0)) - ihs = mlx5e_get_l2_header_size(sq, mb); - break; - case MLX5_INLINE_MODE_L2: - ihs = mlx5e_get_l2_header_size(sq, mb); - break; - case MLX5_INLINE_MODE_NONE: - /* FALLTHROUGH */ - default: - if ((mb->m_flags & M_VLANTAG) != 0 && - (sq->min_insert_caps & MLX5E_INSERT_VLAN) != 0) { - /* inlining VLAN data is not required */ - wqe->eth.vlan_cmd = htons(0x8000); /* bit 0 CVLAN */ - wqe->eth.vlan_hdr = htons(mb->m_pkthdr.ether_vtag); - ihs = 0; - } else if ((mb->m_flags & M_VLANTAG) == 0 && - (sq->min_insert_caps & MLX5E_INSERT_NON_VLAN) != 0) { - /* inlining non-VLAN data is not required */ - ihs = 0; - } else { - /* we are forced to inlining L2 header, if any */ - ihs = mlx5e_get_l2_header_size(sq, mb); + if (args.ihs == 0) { + switch (sq->min_inline_mode) { + case MLX5_INLINE_MODE_IP: + case MLX5_INLINE_MODE_TCP_UDP: + args.ihs = mlx5e_get_full_header_size(mb, NULL); + if (unlikely(args.ihs == 0)) + args.ihs = mlx5e_get_l2_header_size(sq, mb); + break; + case MLX5_INLINE_MODE_L2: + args.ihs = mlx5e_get_l2_header_size(sq, mb); + break; + case MLX5_INLINE_MODE_NONE: + /* FALLTHROUGH */ + default: + if ((mb->m_flags & M_VLANTAG) != 0 && + (sq->min_insert_caps & MLX5E_INSERT_VLAN) != 0) { + /* inlining VLAN data is not required */ + wqe->eth.vlan_cmd = htons(0x8000); /* bit 0 CVLAN */ + wqe->eth.vlan_hdr = htons(mb->m_pkthdr.ether_vtag); + args.ihs = 0; + } else if ((mb->m_flags & M_VLANTAG) == 0 && + (sq->min_insert_caps & MLX5E_INSERT_NON_VLAN) != 0) { + /* inlining non-VLAN data is not required */ + args.ihs = 0; + } else { + /* we are forced to inlining L2 header, if any */ + args.ihs = mlx5e_get_l2_header_size(sq, mb); + } + break; } - break; } sq->mbuf[pi].num_bytes = max_t (unsigned int, mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN); } - if (likely(ihs == 0)) { + if (likely(args.ihs == 0)) { /* nothing to inline */ - } else if (unlikely(ihs > sq->max_inline)) { + } else if (unlikely(args.ihs > sq->max_inline)) { /* inline header size is too big */ err = EINVAL; goto tx_drop; @@ -401,9 +617,9 @@ mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp) wqe->eth.inline_hdr_start; /* Range checks */ - if (unlikely(ihs > (MLX5E_MAX_TX_INLINE - ETHER_VLAN_ENCAP_LEN))) - ihs = (MLX5E_MAX_TX_INLINE - ETHER_VLAN_ENCAP_LEN); - else if (unlikely(ihs < ETHER_HDR_LEN)) { + if (unlikely(args.ihs > (MLX5E_MAX_TX_INLINE - ETHER_VLAN_ENCAP_LEN))) + args.ihs = (MLX5E_MAX_TX_INLINE - ETHER_VLAN_ENCAP_LEN); + else if (unlikely(args.ihs < ETHER_HDR_LEN)) { err = EINVAL; goto tx_drop; } @@ -414,20 +630,20 @@ mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp) eh->evl_encap_proto = htons(ETHERTYPE_VLAN); eh->evl_tag = htons(mb->m_pkthdr.ether_vtag); /* Copy rest of header data, if any */ - m_copydata(mb, 0, ihs - ETHER_HDR_LEN, (caddr_t)(eh + 1)); - m_adj(mb, ihs - ETHER_HDR_LEN); + m_copydata(mb, 0, args.ihs - ETHER_HDR_LEN, (caddr_t)(eh + 1)); + m_adj(mb, args.ihs - ETHER_HDR_LEN); /* Extend header by 4 bytes */ - ihs += ETHER_VLAN_ENCAP_LEN; - wqe->eth.inline_hdr_sz = cpu_to_be16(ihs); + args.ihs += ETHER_VLAN_ENCAP_LEN; + wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs); } else { - m_copydata(mb, 0, ihs, wqe->eth.inline_hdr_start); - m_adj(mb, ihs); - wqe->eth.inline_hdr_sz = cpu_to_be16(ihs); + m_copydata(mb, 0, args.ihs, wqe->eth.inline_hdr_start); + m_adj(mb, args.ihs); + wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs); } ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; - if (ihs > sizeof(wqe->eth.inline_hdr_start)) { - ds_cnt += DIV_ROUND_UP(ihs - sizeof(wqe->eth.inline_hdr_start), + if (args.ihs > sizeof(wqe->eth.inline_hdr_start)) { + ds_cnt += DIV_ROUND_UP(args.ihs - sizeof(wqe->eth.inline_hdr_start), MLX5_SEND_WQE_DS); } dseg = ((struct mlx5_wqe_data_seg *)&wqe->ctrl) + ds_cnt; @@ -451,7 +667,7 @@ mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp) if (err != 0) goto tx_drop; - /* Make sure all mbuf data, if any, is written to RAM */ + /* Make sure all mbuf data, if any, is visible to the bus */ if (nsegs != 0) { bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map, BUS_DMASYNC_PREWRITE); @@ -475,7 +691,9 @@ mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp) wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - if (mlx5e_do_send_cqe(sq)) + wqe->ctrl.imm = cpu_to_be32(args.tisn << 8); + + if (mlx5e_do_send_cqe_inline(sq)) wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; else wqe->ctrl.fm_ce_se = 0; @@ -486,6 +704,9 @@ mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp) /* Store pointer to mbuf */ sq->mbuf[pi].mbuf = mb; sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); + sq->mbuf[pi].p_refcount = args.pref; + if (unlikely(args.pref != NULL)) + atomic_add_int(args.pref, 1); sq->pc += sq->mbuf[pi].num_wqebbs; /* Count all traffic going out */ @@ -533,6 +754,11 @@ mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget) mb = sq->mbuf[ci].mbuf; sq->mbuf[ci].mbuf = NULL; + if (unlikely(sq->mbuf[ci].p_refcount != NULL)) { + atomic_add_int(sq->mbuf[ci].p_refcount, -1); + sq->mbuf[ci].p_refcount = NULL; + } + if (mb == NULL) { if (sq->mbuf[ci].num_bytes == 0) { /* NOP */ diff --git a/sys/modules/mlx5en/Makefile b/sys/modules/mlx5en/Makefile index c49f7d5894a5..ac8c319088ea 100644 --- a/sys/modules/mlx5en/Makefile +++ b/sys/modules/mlx5en/Makefile @@ -8,12 +8,14 @@ mlx5_en_ethtool.c \ mlx5_en_main.c \ mlx5_en_tx.c \ mlx5_en_flow_table.c \ +mlx5_en_hw_tls.c \ mlx5_en_rx.c \ mlx5_en_rl.c \ mlx5_en_txrx.c \ mlx5_en_port_buffer.c \ device_if.h bus_if.h vnode_if.h pci_if.h \ -opt_inet.h opt_inet6.h opt_rss.h opt_ratelimit.h +opt_inet.h opt_inet6.h opt_rss.h opt_ratelimit.h \ +opt_kern_tls.h .if defined(HAVE_PER_CQ_EVENT_PACKET) CFLAGS+= -DHAVE_PER_CQ_EVENT_PACKET