John Baldwin 3c0e568505 Add support for KTLS RX via software decryption.
Allow TLS records to be decrypted in the kernel after being received
by a NIC.  At a high level this is somewhat similar to software KTLS
for the transmit path except in reverse.  Protocols enqueue mbufs
containing encrypted TLS records (or portions of records) into the
tail of a socket buffer and the KTLS layer decrypts those records
before returning them to userland applications.  However, there is an
important difference:

- In the transmit case, the socket buffer is always a single "record"
  holding a chain of mbufs.  Not-yet-encrypted mbufs are marked not
  ready (M_NOTREADY) and released to protocols for transmit by marking
  mbufs ready once their data is encrypted.

- In the receive case, incoming (encrypted) data appended to the
  socket buffer is still a single stream of data from the protocol,
  but decrypted TLS records are stored as separate records in the
  socket buffer and read individually via recvmsg().

Initially I tried to make this work by marking incoming mbufs as
M_NOTREADY, but there didn't seemed to be a non-gross way to deal with
picking a portion of the mbuf chain and turning it into a new record
in the socket buffer after decrypting the TLS record it contained
(along with prepending a control message).  Also, such mbufs would
also need to be "pinned" in some way while they are being decrypted
such that a concurrent sbcut() wouldn't free them out from under the
thread performing decryption.

As such, I settled on the following solution:

- Socket buffers now contain an additional chain of mbufs (sb_mtls,
  sb_mtlstail, and sb_tlscc) containing encrypted mbufs appended by
  the protocol layer.  These mbufs are still marked M_NOTREADY, but
  soreceive*() generally don't know about them (except that they will
  block waiting for data to be decrypted for a blocking read).

- Each time a new mbuf is appended to this TLS mbuf chain, the socket
  buffer peeks at the TLS record header at the head of the chain to
  determine the encrypted record's length.  If enough data is queued
  for the TLS record, the socket is placed on a per-CPU TLS workqueue
  (reusing the existing KTLS workqueues and worker threads).

- The worker thread loops over the TLS mbuf chain decrypting records
  until it runs out of data.  Each record is detached from the TLS
  mbuf chain while it is being decrypted to keep the mbufs "pinned".
  However, a new sb_dtlscc field tracks the character count of the
  detached record and sbcut()/sbdrop() is updated to account for the
  detached record.  After the record is decrypted, the worker thread
  first checks to see if sbcut() dropped the record.  If so, it is
  freed (can happen when a socket is closed with pending data).
  Otherwise, the header and trailer are stripped from the original
  mbufs, a control message is created holding the decrypted TLS
  header, and the decrypted TLS record is appended to the "normal"
  socket buffer chain.

(Side note: the SBCHECK() infrastucture was very useful as I was
 able to add assertions there about the TLS chain that caught several
 bugs during development.)

Tested by:	rmacklem (various versions)
Relnotes:	yes
Sponsored by:	Chelsio Communications
Differential Revision:	https://reviews.freebsd.org/D24628
2020-07-23 23:48:18 +00:00

245 lines
6.4 KiB
C

/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2014-2019 Netflix Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SYS_KTLS_H_
#define _SYS_KTLS_H_
#include <sys/refcount.h>
#include <sys/_task.h>
struct tls_record_layer {
uint8_t tls_type;
uint8_t tls_vmajor;
uint8_t tls_vminor;
uint16_t tls_length;
uint8_t tls_data[0];
} __attribute__ ((packed));
#define TLS_MAX_MSG_SIZE_V10_2 16384
#define TLS_MAX_PARAM_SIZE 1024 /* Max key/mac/iv in sockopt */
#define TLS_AEAD_GCM_LEN 4
#define TLS_1_3_GCM_IV_LEN 12
#define TLS_CBC_IMPLICIT_IV_LEN 16
/* Type values for the record layer */
#define TLS_RLTYPE_APP 23
/*
* Nonce for GCM for TLS 1.2 per RFC 5288.
*/
struct tls_nonce_data {
uint8_t fixed[TLS_AEAD_GCM_LEN];
uint64_t seq;
} __packed;
/*
* AEAD additional data format for TLS 1.2 per RFC 5246.
*/
struct tls_aead_data {
uint64_t seq; /* In network order */
uint8_t type;
uint8_t tls_vmajor;
uint8_t tls_vminor;
uint16_t tls_length;
} __packed;
/*
* AEAD additional data format for TLS 1.3 per RFC 8446.
*/
struct tls_aead_data_13 {
uint8_t type;
uint8_t tls_vmajor;
uint8_t tls_vminor;
uint16_t tls_length;
} __packed;
/*
* Stream Cipher MAC additional data input. This does not match the
* exact data on the wire (the sequence number is not placed on the
* wire, and any explicit IV after the record header is not covered by
* the MAC).
*/
struct tls_mac_data {
uint64_t seq;
uint8_t type;
uint8_t tls_vmajor;
uint8_t tls_vminor;
uint16_t tls_length;
} __packed;
#define TLS_MAJOR_VER_ONE 3
#define TLS_MINOR_VER_ZERO 1 /* 3, 1 */
#define TLS_MINOR_VER_ONE 2 /* 3, 2 */
#define TLS_MINOR_VER_TWO 3 /* 3, 3 */
#define TLS_MINOR_VER_THREE 4 /* 3, 4 */
/* For TCP_TXTLS_ENABLE and TCP_RXTLS_ENABLE. */
#ifdef _KERNEL
struct tls_enable_v0 {
const uint8_t *cipher_key;
const uint8_t *iv; /* Implicit IV. */
const uint8_t *auth_key;
int cipher_algorithm; /* e.g. CRYPTO_AES_CBC */
int cipher_key_len;
int iv_len;
int auth_algorithm; /* e.g. CRYPTO_SHA2_256_HMAC */
int auth_key_len;
int flags;
uint8_t tls_vmajor;
uint8_t tls_vminor;
};
#endif
struct tls_enable {
const uint8_t *cipher_key;
const uint8_t *iv; /* Implicit IV. */
const uint8_t *auth_key;
int cipher_algorithm; /* e.g. CRYPTO_AES_CBC */
int cipher_key_len;
int iv_len;
int auth_algorithm; /* e.g. CRYPTO_SHA2_256_HMAC */
int auth_key_len;
int flags;
uint8_t tls_vmajor;
uint8_t tls_vminor;
uint8_t rec_seq[8];
};
/* Structure for TLS_GET_RECORD. */
struct tls_get_record {
/* TLS record header. */
uint8_t tls_type;
uint8_t tls_vmajor;
uint8_t tls_vminor;
uint16_t tls_length;
};
#ifdef _KERNEL
struct tls_session_params {
uint8_t *cipher_key;
uint8_t *auth_key;
uint8_t iv[TLS_CBC_IMPLICIT_IV_LEN];
int cipher_algorithm;
int auth_algorithm;
uint16_t cipher_key_len;
uint16_t iv_len;
uint16_t auth_key_len;
uint16_t max_frame_len;
uint8_t tls_vmajor;
uint8_t tls_vminor;
uint8_t tls_hlen;
uint8_t tls_tlen;
uint8_t tls_bs;
uint8_t flags;
};
/* Used in APIs to request RX vs TX sessions. */
#define KTLS_TX 1
#define KTLS_RX 2
#define KTLS_API_VERSION 7
struct iovec;
struct ktls_session;
struct m_snd_tag;
struct mbuf;
struct sockbuf;
struct socket;
struct ktls_crypto_backend {
LIST_ENTRY(ktls_crypto_backend) next;
int (*try)(struct socket *so, struct ktls_session *tls, int direction);
int prio;
int api_version;
int use_count;
const char *name;
};
struct ktls_session {
union {
int (*sw_encrypt)(struct ktls_session *tls,
const struct tls_record_layer *hdr, uint8_t *trailer,
struct iovec *src, struct iovec *dst, int iovcnt,
uint64_t seqno, uint8_t record_type);
int (*sw_decrypt)(struct ktls_session *tls,
const struct tls_record_layer *hdr, struct mbuf *m,
uint64_t seqno, int *trailer_len);
};
union {
void *cipher;
struct m_snd_tag *snd_tag;
};
struct ktls_crypto_backend *be;
void (*free)(struct ktls_session *tls);
struct tls_session_params params;
u_int wq_index;
volatile u_int refcount;
int mode;
struct task reset_tag_task;
struct inpcb *inp;
bool reset_pending;
} __aligned(CACHE_LINE_SIZE);
void ktls_check_rx(struct sockbuf *sb);
int ktls_crypto_backend_register(struct ktls_crypto_backend *be);
int ktls_crypto_backend_deregister(struct ktls_crypto_backend *be);
int ktls_enable_rx(struct socket *so, struct tls_enable *en);
int ktls_enable_tx(struct socket *so, struct tls_enable *en);
void ktls_destroy(struct ktls_session *tls);
void ktls_frame(struct mbuf *m, struct ktls_session *tls, int *enqueue_cnt,
uint8_t record_type);
void ktls_seq(struct sockbuf *sb, struct mbuf *m);
void ktls_enqueue(struct mbuf *m, struct socket *so, int page_count);
void ktls_enqueue_to_free(struct mbuf *m);
int ktls_get_rx_mode(struct socket *so);
int ktls_set_tx_mode(struct socket *so, int mode);
int ktls_get_tx_mode(struct socket *so);
int ktls_output_eagain(struct inpcb *inp, struct ktls_session *tls);
static inline struct ktls_session *
ktls_hold(struct ktls_session *tls)
{
if (tls != NULL)
refcount_acquire(&tls->refcount);
return (tls);
}
static inline void
ktls_free(struct ktls_session *tls)
{
if (refcount_release(&tls->refcount))
ktls_destroy(tls);
}
#endif /* !_KERNEL */
#endif /* !_SYS_KTLS_H_ */