KTLS: Re-work unmapped mbufs to carry ext_pgs in the mbuf itself.

While the original implementation of unmapped mbufs was a large
step forward in terms of reducing cache misses by enabling mbufs
to carry more than a single page for sendfile, they are rather
cache unfriendly when accessing the ext_pgs metadata and
data. This is because the ext_pgs part of the mbuf is allocated
separately, and almost guaranteed to be cold in cache.

This change takes advantage of the fact that unmapped mbufs
are never used at the same time as pkthdr mbufs. Given this
fact, we can overlap the ext_pgs metadata with the mbuf
pkthdr, and carry the ext_pgs meta directly in the mbuf itself.
Similarly, we can carry the ext_pgs data (TLS hdr/trailer/array
of pages) directly after the existing m_ext.

In order to be able to carry 5 pages (which is the minimum
required for a 16K TLS record which is not perfectly aligned) on
LP64, I've had to steal ext_arg2. The only user of this in the
xmit path is sendfile, and I've adjusted it to use arg1 when
using unmapped mbufs.

This change is almost entirely mechanical, except that we
change mb_alloc_ext_pgs() to no longer allow allocating
pkthdrs, the change to avoid ext_arg2 as mentioned above,
and the removal of the ext_pgs zone,

This change saves roughly 2% "raw" CPU (~59% -> 57%), or over
3% "scaled" CPU on a Netflix 100% software kTLS workload at
90+ Gb/s on Broadwell Xeons.

In a follow-on commit, I plan to remove some hacks to avoid
access ext_pgs fields of mbufs, since they will now be in
cache.

Many thanks to glebius for helping to make this better in
the Netflix tree.

Reviewed by:	hselasky, jhb, rrs, glebius (early version)
Sponsored by:	Netflix
Differential Revision:	https://reviews.freebsd.org/D24213
This commit is contained in:
Andrew Gallatin 2020-04-14 14:46:06 +00:00
parent 51a16c8412
commit 23feb56348
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=359919
16 changed files with 244 additions and 238 deletions

View File

@ -905,8 +905,8 @@ ktls_tcp_payload_length(struct tlspcb *tlsp, struct mbuf *m_tls)
u_int plen, mlen;
MBUF_EXT_PGS_ASSERT(m_tls);
ext_pgs = m_tls->m_ext.ext_pgs;
hdr = (void *)ext_pgs->hdr;
ext_pgs = &m_tls->m_ext_pgs;
hdr = (void *)ext_pgs->m_epg_hdr;
plen = ntohs(hdr->tls_length);
/*
@ -961,8 +961,8 @@ ktls_payload_offset(struct tlspcb *tlsp, struct mbuf *m_tls)
#endif
MBUF_EXT_PGS_ASSERT(m_tls);
ext_pgs = m_tls->m_ext.ext_pgs;
hdr = (void *)ext_pgs->hdr;
ext_pgs = &m_tls->m_ext_pgs;
hdr = (void *)ext_pgs->m_epg_hdr;
plen = ntohs(hdr->tls_length);
#ifdef INVARIANTS
mlen = mtod(m_tls, vm_offset_t) + m_tls->m_len;
@ -1008,7 +1008,7 @@ ktls_wr_len(struct tlspcb *tlsp, struct mbuf *m, struct mbuf *m_tls,
u_int imm_len, offset, plen, wr_len, tlen;
MBUF_EXT_PGS_ASSERT(m_tls);
ext_pgs = m_tls->m_ext.ext_pgs;
ext_pgs = &m_tls->m_ext_pgs;
/*
* Determine the size of the TLS record payload to send
@ -1040,7 +1040,7 @@ ktls_wr_len(struct tlspcb *tlsp, struct mbuf *m, struct mbuf *m_tls,
return (wr_len);
}
hdr = (void *)ext_pgs->hdr;
hdr = (void *)ext_pgs->m_epg_hdr;
plen = TLS_HEADER_LENGTH + ntohs(hdr->tls_length) - ext_pgs->trail_len;
if (tlen < plen) {
plen = tlen;
@ -1474,7 +1474,7 @@ ktls_write_tunnel_packet(struct sge_txq *txq, void *dst, struct mbuf *m,
/* Locate the template TLS header. */
MBUF_EXT_PGS_ASSERT(m_tls);
ext_pgs = m_tls->m_ext.ext_pgs;
ext_pgs = &m_tls->m_ext_pgs;
/* This should always be the last TLS record in a chain. */
MPASS(m_tls->m_next == NULL);
@ -1543,8 +1543,8 @@ ktls_write_tunnel_packet(struct sge_txq *txq, void *dst, struct mbuf *m,
(m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen + sizeof(*tcp)));
/* Copy the subset of the TLS header requested. */
copy_to_txd(&txq->eq, (char *)ext_pgs->hdr + mtod(m_tls, vm_offset_t),
&out, m_tls->m_len);
copy_to_txd(&txq->eq, (char *)ext_pgs->m_epg_hdr +
mtod(m_tls, vm_offset_t), &out, m_tls->m_len);
txq->imm_wrs++;
txq->txpkt_wrs++;
@ -1603,8 +1603,8 @@ ktls_write_tls_wr(struct tlspcb *tlsp, struct sge_txq *txq,
/* Locate the TLS header. */
MBUF_EXT_PGS_ASSERT(m_tls);
ext_pgs = m_tls->m_ext.ext_pgs;
hdr = (void *)ext_pgs->hdr;
ext_pgs = &m_tls->m_ext_pgs;
hdr = (void *)ext_pgs->m_epg_hdr;
plen = TLS_HEADER_LENGTH + ntohs(hdr->tls_length) - ext_pgs->trail_len;
/* Determine how much of the TLS record to send. */
@ -2031,7 +2031,7 @@ ktls_write_tls_wr(struct tlspcb *tlsp, struct sge_txq *txq,
/* Populate the TLS header */
out = (void *)(tx_data + 1);
if (offset == 0) {
memcpy(out, ext_pgs->hdr, ext_pgs->hdr_len);
memcpy(out, ext_pgs->m_epg_hdr, ext_pgs->hdr_len);
out += ext_pgs->hdr_len;
}

View File

@ -2419,7 +2419,7 @@ count_mbuf_ext_pgs(struct mbuf *m, int skip, vm_paddr_t *nextaddr)
int nsegs = 0;
MBUF_EXT_PGS_ASSERT(m);
ext_pgs = m->m_ext.ext_pgs;
ext_pgs = &m->m_ext_pgs;
off = mtod(m, vm_offset_t);
len = m->m_len;
off += skip;
@ -2435,7 +2435,7 @@ count_mbuf_ext_pgs(struct mbuf *m, int skip, vm_paddr_t *nextaddr)
off = 0;
len -= seglen;
paddr = pmap_kextract(
(vm_offset_t)&ext_pgs->hdr[segoff]);
(vm_offset_t)&ext_pgs->m_epg_hdr[segoff]);
if (*nextaddr != paddr)
nsegs++;
*nextaddr = paddr + seglen;
@ -2454,7 +2454,7 @@ count_mbuf_ext_pgs(struct mbuf *m, int skip, vm_paddr_t *nextaddr)
off = 0;
seglen = min(seglen, len);
len -= seglen;
paddr = ext_pgs->pa[i] + segoff;
paddr = ext_pgs->m_epg_pa[i] + segoff;
if (*nextaddr != paddr)
nsegs++;
*nextaddr = paddr + seglen;
@ -2463,7 +2463,7 @@ count_mbuf_ext_pgs(struct mbuf *m, int skip, vm_paddr_t *nextaddr)
if (len != 0) {
seglen = min(len, ext_pgs->trail_len - off);
len -= seglen;
paddr = pmap_kextract((vm_offset_t)&ext_pgs->trail[off]);
paddr = pmap_kextract((vm_offset_t)&ext_pgs->m_epg_trail[off]);
if (*nextaddr != paddr)
nsegs++;
*nextaddr = paddr + seglen;

View File

@ -732,7 +732,7 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
if (m->m_flags & M_NOMAP) {
#ifdef KERN_TLS
if (m->m_ext.ext_pgs->tls != NULL) {
if (m->m_ext_pgs.tls != NULL) {
toep->flags |= TPF_KTLS;
if (plen == 0) {
SOCKBUF_UNLOCK(sb);
@ -1927,7 +1927,7 @@ aiotx_free_pgs(struct mbuf *m)
vm_page_t pg;
MBUF_EXT_PGS_ASSERT(m);
ext_pgs = m->m_ext.ext_pgs;
ext_pgs = &m->m_ext_pgs;
job = m->m_ext.ext_arg1;
#ifdef VERBOSE_TRACES
CTR3(KTR_CXGBE, "%s: completed %d bytes for tid %d", __func__,
@ -1935,7 +1935,7 @@ aiotx_free_pgs(struct mbuf *m)
#endif
for (int i = 0; i < ext_pgs->npgs; i++) {
pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]);
pg = PHYS_TO_VM_PAGE(ext_pgs->m_epg_pa[i]);
vm_page_unwire(pg, PQ_ACTIVE);
}
@ -1984,13 +1984,13 @@ alloc_aiotx_mbuf(struct kaiocb *job, int len)
if (npages < 0)
break;
m = mb_alloc_ext_pgs(M_WAITOK, false, aiotx_free_pgs);
m = mb_alloc_ext_pgs(M_WAITOK, aiotx_free_pgs);
if (m == NULL) {
vm_page_unhold_pages(pgs, npages);
break;
}
ext_pgs = m->m_ext.ext_pgs;
ext_pgs = &m->m_ext_pgs;
ext_pgs->first_pg_off = pgoff;
ext_pgs->npgs = npages;
if (npages == 1) {
@ -2003,7 +2003,7 @@ alloc_aiotx_mbuf(struct kaiocb *job, int len)
(npages - 2) * PAGE_SIZE;
}
for (i = 0; i < npages; i++)
ext_pgs->pa[i] = VM_PAGE_TO_PHYS(pgs[i]);
ext_pgs->m_epg_pa[i] = VM_PAGE_TO_PHYS(pgs[i]);
m->m_len = mlen;
m->m_ext.ext_size = npages * PAGE_SIZE;

View File

@ -1568,24 +1568,26 @@ t4_push_tls_records(struct adapter *sc, struct toepcb *toep, int drop)
#ifdef KERN_TLS
static int
count_ext_pgs_segs(struct mbuf_ext_pgs *ext_pgs)
count_ext_pgs_segs(struct mbuf_ext_pgs *ext_pgs,
struct mbuf_ext_pgs_data *ext_pgs_data)
{
vm_paddr_t nextpa;
u_int i, nsegs;
MPASS(ext_pgs->npgs > 0);
nsegs = 1;
nextpa = ext_pgs->pa[0] + PAGE_SIZE;
nextpa = ext_pgs_data->pa[0] + PAGE_SIZE;
for (i = 1; i < ext_pgs->npgs; i++) {
if (nextpa != ext_pgs->pa[i])
if (nextpa != ext_pgs_data->pa[i])
nsegs++;
nextpa = ext_pgs->pa[i] + PAGE_SIZE;
nextpa = ext_pgs_data->pa[i] + PAGE_SIZE;
}
return (nsegs);
}
static void
write_ktlstx_sgl(void *dst, struct mbuf_ext_pgs *ext_pgs, int nsegs)
write_ktlstx_sgl(void *dst, struct mbuf_ext_pgs *ext_pgs,
struct mbuf_ext_pgs_data *ext_pgs_data, int nsegs)
{
struct ulptx_sgl *usgl = dst;
vm_paddr_t pa;
@ -1598,12 +1600,12 @@ write_ktlstx_sgl(void *dst, struct mbuf_ext_pgs *ext_pgs, int nsegs)
V_ULPTX_NSGE(nsegs));
/* Figure out the first S/G length. */
pa = ext_pgs->pa[0] + ext_pgs->first_pg_off;
pa = ext_pgs_data->pa[0] + ext_pgs->first_pg_off;
usgl->addr0 = htobe64(pa);
len = mbuf_ext_pg_len(ext_pgs, 0, ext_pgs->first_pg_off);
pa += len;
for (i = 1; i < ext_pgs->npgs; i++) {
if (ext_pgs->pa[i] != pa)
if (ext_pgs_data->pa[i] != pa)
break;
len += mbuf_ext_pg_len(ext_pgs, i, 0);
pa += mbuf_ext_pg_len(ext_pgs, i, 0);
@ -1615,14 +1617,14 @@ write_ktlstx_sgl(void *dst, struct mbuf_ext_pgs *ext_pgs, int nsegs)
j = -1;
for (; i < ext_pgs->npgs; i++) {
if (j == -1 || ext_pgs->pa[i] != pa) {
if (j == -1 || ext_pgs_data->pa[i] != pa) {
if (j >= 0)
usgl->sge[j / 2].len[j & 1] = htobe32(len);
j++;
#ifdef INVARIANTS
nsegs--;
#endif
pa = ext_pgs->pa[i];
pa = ext_pgs_data->pa[i];
usgl->sge[j / 2].addr[j & 1] = htobe64(pa);
len = mbuf_ext_pg_len(ext_pgs, i, 0);
pa += len;
@ -1744,7 +1746,7 @@ t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop)
KASSERT(m->m_flags & M_NOMAP, ("%s: mbuf %p is not NOMAP",
__func__, m));
KASSERT(m->m_ext.ext_pgs->tls != NULL,
KASSERT(m->m_ext_pgs.tls != NULL,
("%s: mbuf %p doesn't have TLS session", __func__, m));
/* Calculate WR length. */
@ -1756,7 +1758,8 @@ t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop)
wr_len += AES_BLOCK_LEN;
/* Account for SGL in work request length. */
nsegs = count_ext_pgs_segs(m->m_ext.ext_pgs);
nsegs = count_ext_pgs_segs(&m->m_ext_pgs,
&m->m_ext.ext_pgs);
wr_len += sizeof(struct ulptx_sgl) +
((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8;
@ -1810,22 +1813,22 @@ t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop)
return;
}
thdr = (struct tls_hdr *)m->m_ext.ext_pgs->hdr;
thdr = (struct tls_hdr *)&m->m_epg_hdr;
#ifdef VERBOSE_TRACES
CTR5(KTR_CXGBE, "%s: tid %d TLS record %ju type %d len %#x",
__func__, toep->tid, m->m_ext.ext_pgs->seqno, thdr->type,
__func__, toep->tid, m->m_ext_pgs.seqno, thdr->type,
m->m_len);
#endif
txwr = wrtod(wr);
cpl = (struct cpl_tx_tls_sfo *)(txwr + 1);
memset(txwr, 0, roundup2(wr_len, 16));
credits = howmany(wr_len, 16);
expn_size = m->m_ext.ext_pgs->hdr_len +
m->m_ext.ext_pgs->trail_len;
expn_size = m->m_ext_pgs.hdr_len +
m->m_ext_pgs.trail_len;
tls_size = m->m_len - expn_size;
write_tlstx_wr(txwr, toep, 0,
tls_size, expn_size, 1, credits, shove, 1);
toep->tls.tx_seq_no = m->m_ext.ext_pgs->seqno;
toep->tls.tx_seq_no = m->m_ext_pgs.seqno;
write_tlstx_cpl(cpl, toep, thdr, tls_size, 1);
tls_copy_tx_key(toep, cpl + 1);
@ -1834,7 +1837,8 @@ t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop)
memcpy(buf, thdr + 1, toep->tls.iv_len);
buf += AES_BLOCK_LEN;
write_ktlstx_sgl(buf, m->m_ext.ext_pgs, nsegs);
write_ktlstx_sgl(buf, &m->m_ext_pgs, &m->m_ext.ext_pgs,
nsegs);
KASSERT(toep->tx_credits >= credits,
("%s: not enough credits", __func__));

View File

@ -618,15 +618,14 @@ mlx5e_tls_send_nop(struct mlx5e_sq *sq, struct mlx5e_tls_tag *ptag)
static struct mbuf *
sbtls_recover_record(struct mbuf *mb, int wait, uint32_t tcp_old, uint32_t *ptcp_seq)
{
struct mbuf *mr;
struct mbuf *mr, *top;
uint32_t offset;
uint32_t delta;
/* check format of incoming mbuf */
if (mb->m_next == NULL ||
(mb->m_next->m_flags & (M_NOMAP | M_EXT)) != (M_NOMAP | M_EXT) ||
mb->m_next->m_ext.ext_buf == NULL) {
mr = NULL;
(mb->m_next->m_flags & (M_NOMAP | M_EXT)) != (M_NOMAP | M_EXT)) {
top = NULL;
goto done;
}
@ -635,22 +634,31 @@ sbtls_recover_record(struct mbuf *mb, int wait, uint32_t tcp_old, uint32_t *ptcp
/* check if we don't need to re-transmit anything */
if (offset == 0) {
mr = SBTLS_MBUF_NO_DATA;
top = SBTLS_MBUF_NO_DATA;
goto done;
}
/* try to get a new mbufs with packet header */
mr = m_gethdr(wait, MT_DATA);
if (mr == NULL)
/* try to get a new packet header */
top = m_gethdr(wait, MT_DATA);
if (top == NULL)
goto done;
mr = m_get(wait, MT_DATA);
if (mr == NULL) {
m_free(top);
top = NULL;
goto done;
}
top->m_next = mr;
mb_dupcl(mr, mb->m_next);
/* the beginning of the TLS record */
mr->m_data = NULL;
/* setup packet header length */
mr->m_pkthdr.len = mr->m_len = offset;
top->m_pkthdr.len = mr->m_len = offset;
/* check for partial re-transmit */
delta = *ptcp_seq - tcp_old;
@ -666,7 +674,7 @@ sbtls_recover_record(struct mbuf *mb, int wait, uint32_t tcp_old, uint32_t *ptcp
*/
*ptcp_seq -= offset;
done:
return (mr);
return (top);
}
static int
@ -677,7 +685,7 @@ mlx5e_sq_tls_populate(struct mbuf *mb, uint64_t *pseq)
for (; mb != NULL; mb = mb->m_next) {
if (!(mb->m_flags & M_NOMAP))
continue;
ext_pgs = (void *)mb->m_ext.ext_buf;
ext_pgs = &mb->m_ext_pgs;
*pseq = ext_pgs->seqno;
return (1);
}

View File

@ -295,7 +295,6 @@ uma_zone_t zone_pack;
uma_zone_t zone_jumbop;
uma_zone_t zone_jumbo9;
uma_zone_t zone_jumbo16;
uma_zone_t zone_extpgs;
/*
* Local prototypes.
@ -312,9 +311,11 @@ static void mb_reclaim(uma_zone_t, int);
/* Ensure that MSIZE is a power of 2. */
CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
_Static_assert(sizeof(struct mbuf_ext_pgs) == 256,
"mbuf_ext_pgs size mismatch");
_Static_assert(offsetof(struct mbuf, m_ext) ==
offsetof(struct mbuf, m_ext_pgs.m_ext),
"m_ext offset mismatch between mbuf and ext_pgs");
_Static_assert(sizeof(struct mbuf) <= MSIZE,
"size of mbuf exceeds MSIZE");
/*
* Initialize FreeBSD Network buffer allocation.
*/
@ -369,11 +370,6 @@ mbuf_init(void *dummy)
uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached");
uma_zone_set_maxaction(zone_jumbo16, mb_reclaim);
zone_extpgs = uma_zcreate(MBUF_EXTPGS_MEM_NAME,
sizeof(struct mbuf_ext_pgs),
NULL, NULL, NULL, NULL,
UMA_ALIGN_CACHE, 0);
/*
* Hook event handler for low-memory situation, used to
* drain protocols and push data back to the caches (UMA
@ -840,8 +836,8 @@ mb_free_notready(struct mbuf *m, int count)
for (i = 0; i < count && m != NULL; i++) {
if ((m->m_flags & M_EXT) != 0 &&
m->m_ext.ext_type == EXT_PGS) {
m->m_ext.ext_pgs->nrdy--;
if (m->m_ext.ext_pgs->nrdy != 0)
m->m_ext_pgs.nrdy--;
if (m->m_ext_pgs.nrdy != 0)
continue;
}
m = m_free(m);
@ -883,27 +879,22 @@ mb_unmapped_compress(struct mbuf *m)
if (*refcnt != 1)
return (EBUSY);
/*
* Copy mbuf header and m_ext portion of 'm' to 'm_temp' to
* create a "fake" EXT_PGS mbuf that can be used with
* m_copydata() as well as the ext_free callback.
*/
memcpy(&m_temp, m, offsetof(struct mbuf, m_ext) + sizeof (m->m_ext));
m_temp.m_next = NULL;
m_temp.m_nextpkt = NULL;
m_init(&m_temp, M_NOWAIT, MT_DATA, 0);
/* copy data out of old mbuf */
m_copydata(m, 0, m->m_len, mtod(&m_temp, char *));
m_temp.m_len = m->m_len;
/* Free the backing pages. */
m->m_ext.ext_free(m);
/* Turn 'm' into a "normal" mbuf. */
m->m_flags &= ~(M_EXT | M_RDONLY | M_NOMAP);
m->m_data = m->m_dat;
/* Copy data from template's ext_pgs. */
m_copydata(&m_temp, 0, m_temp.m_len, mtod(m, caddr_t));
/* copy data back into m */
m_copydata(&m_temp, 0, m_temp.m_len, mtod(m, char *));
/* Free the backing pages. */
m_temp.m_ext.ext_free(&m_temp);
/* Finally, free the ext_pgs struct. */
uma_zfree(zone_extpgs, m_temp.m_ext.ext_pgs);
return (0);
}
@ -959,7 +950,7 @@ _mb_unmapped_to_ext(struct mbuf *m)
u_int ref_inc = 0;
MBUF_EXT_PGS_ASSERT(m);
ext_pgs = m->m_ext.ext_pgs;
ext_pgs = &m->m_ext_pgs;
len = m->m_len;
KASSERT(ext_pgs->tls == NULL, ("%s: can't convert TLS mbuf %p",
__func__, m));
@ -993,7 +984,7 @@ _mb_unmapped_to_ext(struct mbuf *m)
goto fail;
m_new->m_len = seglen;
prev = top = m_new;
memcpy(mtod(m_new, void *), &ext_pgs->hdr[segoff],
memcpy(mtod(m_new, void *), &ext_pgs->m_epg_hdr[segoff],
seglen);
}
}
@ -1011,7 +1002,7 @@ _mb_unmapped_to_ext(struct mbuf *m)
seglen = min(seglen, len);
len -= seglen;
pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]);
pg = PHYS_TO_VM_PAGE(ext_pgs->m_epg_pa[i]);
m_new = m_get(M_NOWAIT, MT_DATA);
if (m_new == NULL)
goto fail;
@ -1045,7 +1036,7 @@ _mb_unmapped_to_ext(struct mbuf *m)
else
prev->m_next = m_new;
m_new->m_len = len;
memcpy(mtod(m_new, void *), &ext_pgs->trail[off], len);
memcpy(mtod(m_new, void *), &ext_pgs->m_epg_trail[off], len);
}
if (ref_inc != 0) {
@ -1132,23 +1123,16 @@ mb_unmapped_to_ext(struct mbuf *top)
* freed.
*/
struct mbuf *
mb_alloc_ext_pgs(int how, bool pkthdr, m_ext_free_t ext_free)
mb_alloc_ext_pgs(int how, m_ext_free_t ext_free)
{
struct mbuf *m;
struct mbuf_ext_pgs *ext_pgs;
if (pkthdr)
m = m_gethdr(how, MT_DATA);
else
m = m_get(how, MT_DATA);
m = m_get(how, MT_DATA);
if (m == NULL)
return (NULL);
ext_pgs = uma_zalloc(zone_extpgs, how);
if (ext_pgs == NULL) {
m_free(m);
return (NULL);
}
ext_pgs = &m->m_ext_pgs;
ext_pgs->npgs = 0;
ext_pgs->nrdy = 0;
ext_pgs->first_pg_off = 0;
@ -1163,7 +1147,6 @@ mb_alloc_ext_pgs(int how, bool pkthdr, m_ext_free_t ext_free)
m->m_ext.ext_type = EXT_PGS;
m->m_ext.ext_flags = EXT_FLAG_EMBREF;
m->m_ext.ext_count = 1;
m->m_ext.ext_pgs = ext_pgs;
m->m_ext.ext_size = 0;
m->m_ext.ext_free = ext_free;
return (m);
@ -1180,7 +1163,7 @@ mb_ext_pgs_check(struct mbuf_ext_pgs *ext_pgs)
*/
KASSERT(ext_pgs->npgs > 0,
("ext_pgs with no valid pages: %p", ext_pgs));
KASSERT(ext_pgs->npgs <= nitems(ext_pgs->pa),
KASSERT(ext_pgs->npgs <= nitems(ext_pgs->m_epg_pa),
("ext_pgs with too many pages: %p", ext_pgs));
KASSERT(ext_pgs->nrdy <= ext_pgs->npgs,
("ext_pgs with too many ready pages: %p", ext_pgs));
@ -1195,9 +1178,9 @@ mb_ext_pgs_check(struct mbuf_ext_pgs *ext_pgs)
PAGE_SIZE, ("ext_pgs with single page too large: %p",
ext_pgs));
}
KASSERT(ext_pgs->hdr_len <= sizeof(ext_pgs->hdr),
KASSERT(ext_pgs->hdr_len <= sizeof(ext_pgs->m_epg_hdr),
("ext_pgs with too large header length: %p", ext_pgs));
KASSERT(ext_pgs->trail_len <= sizeof(ext_pgs->trail),
KASSERT(ext_pgs->trail_len <= sizeof(ext_pgs->m_epg_trail),
("ext_pgs with too large header length: %p", ext_pgs));
}
#endif
@ -1277,15 +1260,14 @@ mb_free_ext(struct mbuf *m)
("%s: ext_free not set", __func__));
mref->m_ext.ext_free(mref);
#ifdef KERN_TLS
pgs = mref->m_ext.ext_pgs;
pgs = &mref->m_ext_pgs;
tls = pgs->tls;
if (tls != NULL &&
!refcount_release_if_not_last(&tls->refcount))
ktls_enqueue_to_free(pgs);
else
#endif
uma_zfree(zone_extpgs, mref->m_ext.ext_pgs);
uma_zfree(zone_mbuf, mref);
uma_zfree(zone_mbuf, mref);
break;
}
case EXT_SFBUF:

View File

@ -174,18 +174,18 @@ sendfile_free_mext_pg(struct mbuf *m)
("%s: m %p !M_EXT or !EXT_PGS", __func__, m));
cache_last = m->m_ext.ext_flags & EXT_FLAG_CACHE_LAST;
ext_pgs = m->m_ext.ext_pgs;
ext_pgs = &m->m_ext_pgs;
flags = (m->m_ext.ext_flags & EXT_FLAG_NOCACHE) != 0 ? VPR_TRYFREE : 0;
for (i = 0; i < ext_pgs->npgs; i++) {
if (cache_last && i == ext_pgs->npgs - 1)
flags = 0;
pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]);
pg = PHYS_TO_VM_PAGE(ext_pgs->m_epg_pa[i]);
vm_page_release(pg, flags);
}
if (m->m_ext.ext_flags & EXT_FLAG_SYNC) {
struct sendfile_sync *sfs = m->m_ext.ext_arg2;
struct sendfile_sync *sfs = m->m_ext.ext_arg1;
mtx_lock(&sfs->mtx);
KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0"));
@ -329,7 +329,7 @@ sendfile_iodone(void *arg, vm_page_t *pa, int count, int error)
#if defined(KERN_TLS) && defined(INVARIANTS)
if ((sfio->m->m_flags & M_EXT) != 0 &&
sfio->m->m_ext.ext_type == EXT_PGS)
KASSERT(sfio->tls == sfio->m->m_ext.ext_pgs->tls,
KASSERT(sfio->tls == sfio->m->m_ext_pgs.tls,
("TLS session mismatch"));
else
KASSERT(sfio->tls == NULL,
@ -958,7 +958,7 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
ext_pgs_idx++;
if (ext_pgs_idx == max_pgs) {
m0 = mb_alloc_ext_pgs(M_WAITOK, false,
m0 = mb_alloc_ext_pgs(M_WAITOK,
sendfile_free_mext_pg);
if (flags & SF_NOCACHE) {
@ -979,12 +979,18 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
if (sfs != NULL) {
m0->m_ext.ext_flags |=
EXT_FLAG_SYNC;
m0->m_ext.ext_arg2 = sfs;
if (m0->m_ext.ext_type ==
EXT_PGS)
m0->m_ext.ext_arg1 =
sfs;
else
m0->m_ext.ext_arg2 =
sfs;
mtx_lock(&sfs->mtx);
sfs->count++;
mtx_unlock(&sfs->mtx);
}
ext_pgs = m0->m_ext.ext_pgs;
ext_pgs = &m0->m_ext_pgs;
ext_pgs_idx = 0;
/* Append to mbuf chain. */
@ -1001,7 +1007,7 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
ext_pgs->nrdy++;
}
ext_pgs->pa[ext_pgs_idx] = VM_PAGE_TO_PHYS(pga);
ext_pgs->m_epg_pa[ext_pgs_idx] = VM_PAGE_TO_PHYS(pga);
ext_pgs->npgs++;
xfs = xfsize(i, npages, off, space);
ext_pgs->last_pg_len = xfs;
@ -1055,6 +1061,10 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
m0->m_ext.ext_flags |= EXT_FLAG_NOCACHE;
if (sfs != NULL) {
m0->m_ext.ext_flags |= EXT_FLAG_SYNC;
if (m0->m_ext.ext_type == EXT_PGS)
m0->m_ext.ext_arg1 = sfs;
else
m0->m_ext.ext_arg2 = sfs;
m0->m_ext.ext_arg2 = sfs;
mtx_lock(&sfs->mtx);
sfs->count++;

View File

@ -123,7 +123,7 @@ _bus_dmamap_load_unmapped_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map,
int error, i, off, len, pglen, pgoff, seglen, segoff;
MBUF_EXT_PGS_ASSERT(m);
ext_pgs = m->m_ext.ext_pgs;
ext_pgs = &m->m_ext_pgs;
len = m->m_len;
error = 0;
@ -141,7 +141,7 @@ _bus_dmamap_load_unmapped_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map,
off = 0;
len -= seglen;
error = _bus_dmamap_load_buffer(dmat, map,
&ext_pgs->hdr[segoff], seglen, kernel_pmap,
&ext_pgs->m_epg_hdr[segoff], seglen, kernel_pmap,
flags, segs, nsegs);
}
}
@ -159,7 +159,7 @@ _bus_dmamap_load_unmapped_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map,
seglen = min(seglen, len);
len -= seglen;
error = _bus_dmamap_load_phys(dmat, map,
ext_pgs->pa[i] + segoff, seglen, flags, segs, nsegs);
ext_pgs->m_epg_pa[i] + segoff, seglen, flags, segs, nsegs);
pgoff = 0;
};
if (len != 0 && error == 0) {
@ -167,7 +167,7 @@ _bus_dmamap_load_unmapped_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map,
("off + len > trail (%d + %d > %d)", off, len,
ext_pgs->trail_len));
error = _bus_dmamap_load_buffer(dmat, map,
&ext_pgs->trail[off], len, kernel_pmap, flags, segs,
&ext_pgs->m_epg_trail[off], len, kernel_pmap, flags, segs,
nsegs);
}
return (error);

View File

@ -242,7 +242,8 @@ sglist_count_ext_pgs(struct mbuf_ext_pgs *ext_pgs, size_t off, size_t len)
seglen = MIN(seglen, len);
off = 0;
len -= seglen;
nsegs += sglist_count(&ext_pgs->hdr[segoff], seglen);
nsegs += sglist_count(&ext_pgs->m_epg_hdr[segoff],
seglen);
}
}
nextaddr = 0;
@ -259,7 +260,7 @@ sglist_count_ext_pgs(struct mbuf_ext_pgs *ext_pgs, size_t off, size_t len)
off = 0;
seglen = MIN(seglen, len);
len -= seglen;
paddr = ext_pgs->pa[i] + segoff;
paddr = ext_pgs->m_epg_pa[i] + segoff;
if (paddr != nextaddr)
nsegs++;
nextaddr = paddr + seglen;
@ -268,7 +269,7 @@ sglist_count_ext_pgs(struct mbuf_ext_pgs *ext_pgs, size_t off, size_t len)
if (len != 0) {
seglen = MIN(len, ext_pgs->trail_len - off);
len -= seglen;
nsegs += sglist_count(&ext_pgs->trail[off], seglen);
nsegs += sglist_count(&ext_pgs->m_epg_trail[off], seglen);
}
KASSERT(len == 0, ("len != 0"));
return (nsegs);
@ -283,7 +284,7 @@ sglist_count_mb_ext_pgs(struct mbuf *m)
{
MBUF_EXT_PGS_ASSERT(m);
return (sglist_count_ext_pgs(m->m_ext.ext_pgs, mtod(m, vm_offset_t),
return (sglist_count_ext_pgs(&m->m_ext_pgs, mtod(m, vm_offset_t),
m->m_len));
}
@ -412,7 +413,7 @@ sglist_append_ext_pgs(struct sglist *sg, struct mbuf_ext_pgs *ext_pgs,
off = 0;
len -= seglen;
error = sglist_append(sg,
&ext_pgs->hdr[segoff], seglen);
&ext_pgs->m_epg_hdr[segoff], seglen);
}
}
pgoff = ext_pgs->first_pg_off;
@ -428,7 +429,7 @@ sglist_append_ext_pgs(struct sglist *sg, struct mbuf_ext_pgs *ext_pgs,
off = 0;
seglen = MIN(seglen, len);
len -= seglen;
paddr = ext_pgs->pa[i] + segoff;
paddr = ext_pgs->m_epg_pa[i] + segoff;
error = sglist_append_phys(sg, paddr, seglen);
pgoff = 0;
};
@ -436,7 +437,7 @@ sglist_append_ext_pgs(struct sglist *sg, struct mbuf_ext_pgs *ext_pgs,
seglen = MIN(len, ext_pgs->trail_len - off);
len -= seglen;
error = sglist_append(sg,
&ext_pgs->trail[off], seglen);
&ext_pgs->m_epg_trail[off], seglen);
}
if (error == 0)
KASSERT(len == 0, ("len != 0"));
@ -454,7 +455,7 @@ sglist_append_mb_ext_pgs(struct sglist *sg, struct mbuf *m)
/* for now, all unmapped mbufs are assumed to be EXT_PGS */
MBUF_EXT_PGS_ASSERT(m);
return (sglist_append_ext_pgs(sg, m->m_ext.ext_pgs,
return (sglist_append_ext_pgs(sg, &m->m_ext_pgs,
mtod(m, vm_offset_t), m->m_len));
}

View File

@ -1216,7 +1216,7 @@ ktls_seq(struct sockbuf *sb, struct mbuf *m)
KASSERT((m->m_flags & M_NOMAP) != 0,
("ktls_seq: mapped mbuf %p", m));
pgs = m->m_ext.ext_pgs;
pgs = &m->m_ext_pgs;
pgs->seqno = sb->sb_tls_seqno;
sb->sb_tls_seqno++;
}
@ -1264,7 +1264,7 @@ ktls_frame(struct mbuf *top, struct ktls_session *tls, int *enq_cnt,
("ktls_frame: mapped mbuf %p (top = %p)\n", m, top));
tls_len = m->m_len;
pgs = m->m_ext.ext_pgs;
pgs = &m->m_ext_pgs;
/* Save a reference to the session. */
pgs->tls = ktls_hold(tls);
@ -1297,7 +1297,7 @@ ktls_frame(struct mbuf *top, struct ktls_session *tls, int *enq_cnt,
m->m_len += pgs->hdr_len + pgs->trail_len;
/* Populate the TLS header. */
tlshdr = (void *)pgs->hdr;
tlshdr = (void *)pgs->m_epg_hdr;
tlshdr->tls_vmajor = tls->params.tls_vmajor;
/*
@ -1310,6 +1310,7 @@ ktls_frame(struct mbuf *top, struct ktls_session *tls, int *enq_cnt,
tlshdr->tls_type = TLS_RLTYPE_APP;
/* save the real record type for later */
pgs->record_type = record_type;
pgs->m_epg_trail[0] = record_type;
} else {
tlshdr->tls_vminor = tls->params.tls_vminor;
tlshdr->tls_type = record_type;
@ -1380,7 +1381,7 @@ ktls_enqueue(struct mbuf *m, struct socket *so, int page_count)
("ktls_enqueue: %p not unready & nomap mbuf\n", m));
KASSERT(page_count != 0, ("enqueueing TLS mbuf with zero page count"));
pgs = m->m_ext.ext_pgs;
pgs = &m->m_ext_pgs;
KASSERT(pgs->tls->mode == TCP_TLS_MODE_SW, ("!SW TLS mbuf"));
@ -1447,7 +1448,7 @@ ktls_encrypt(struct mbuf_ext_pgs *pgs)
*/
error = 0;
for (m = top; npages != total_pages; m = m->m_next) {
pgs = m->m_ext.ext_pgs;
pgs = &m->m_ext_pgs;
KASSERT(pgs->tls == tls,
("different TLS sessions in a single mbuf chain: %p vs %p",
@ -1474,7 +1475,8 @@ ktls_encrypt(struct mbuf_ext_pgs *pgs)
len = mbuf_ext_pg_len(pgs, i, off);
src_iov[i].iov_len = len;
src_iov[i].iov_base =
(char *)(void *)PHYS_TO_DMAP(pgs->pa[i]) + off;
(char *)(void *)PHYS_TO_DMAP(pgs->m_epg_pa[i]) +
off;
if (is_anon) {
dst_iov[i].iov_base = src_iov[i].iov_base;
@ -1497,8 +1499,8 @@ ktls_encrypt(struct mbuf_ext_pgs *pgs)
npages += i;
error = (*tls->sw_encrypt)(tls,
(const struct tls_record_layer *)pgs->hdr,
pgs->trail, src_iov, dst_iov, i, pgs->seqno,
(const struct tls_record_layer *)pgs->m_epg_hdr,
pgs->m_epg_trail, src_iov, dst_iov, i, pgs->seqno,
pgs->record_type);
if (error) {
counter_u64_add(ktls_offload_failed_crypto, 1);
@ -1516,7 +1518,7 @@ ktls_encrypt(struct mbuf_ext_pgs *pgs)
/* Replace them with the new pages. */
for (i = 0; i < pgs->npgs; i++)
pgs->pa[i] = parray[i];
pgs->m_epg_pa[i] = parray[i];
/* Use the basic free routine. */
m->m_ext.ext_free = mb_free_mext_pgs;
@ -1556,6 +1558,7 @@ ktls_work_thread(void *ctx)
struct ktls_wq *wq = ctx;
struct mbuf_ext_pgs *p, *n;
struct ktls_session *tls;
struct mbuf *m;
STAILQ_HEAD(, mbuf_ext_pgs) local_head;
#if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
@ -1580,7 +1583,8 @@ ktls_work_thread(void *ctx)
} else {
tls = p->tls;
ktls_free(tls);
uma_zfree(zone_extpgs, p);
m = __containerof(p, struct mbuf, m_ext_pgs);
uma_zfree(zone_mbuf, m);
}
}
}

View File

@ -163,11 +163,11 @@ CTASSERT(offsetof(struct mbuf, m_pktdat) % 8 == 0);
#if defined(__LP64__)
CTASSERT(offsetof(struct mbuf, m_dat) == 32);
CTASSERT(sizeof(struct pkthdr) == 56);
CTASSERT(sizeof(struct m_ext) == 48);
CTASSERT(sizeof(struct m_ext) == 168);
#else
CTASSERT(offsetof(struct mbuf, m_dat) == 24);
CTASSERT(sizeof(struct pkthdr) == 48);
CTASSERT(sizeof(struct m_ext) == 28);
CTASSERT(sizeof(struct m_ext) == 184);
#endif
/*
@ -203,6 +203,9 @@ mb_dupcl(struct mbuf *n, struct mbuf *m)
*/
if (m->m_ext.ext_type == EXT_EXTREF)
bcopy(&m->m_ext, &n->m_ext, sizeof(struct m_ext));
else if (m->m_ext.ext_type == EXT_PGS)
bcopy(&m->m_ext_pgs, &n->m_ext_pgs,
sizeof(struct mbuf_ext_pgs));
else
bcopy(&m->m_ext, &n->m_ext, m_ext_copylen);
n->m_flags |= M_EXT;
@ -1426,7 +1429,7 @@ frags_per_mbuf(struct mbuf *m)
* XXX: This overestimates the number of fragments by assuming
* all the backing physical pages are disjoint.
*/
ext_pgs = m->m_ext.ext_pgs;
ext_pgs = &m->m_ext_pgs;
frags = 0;
if (ext_pgs->hdr_len != 0)
frags++;
@ -1618,9 +1621,9 @@ mb_free_mext_pgs(struct mbuf *m)
vm_page_t pg;
MBUF_EXT_PGS_ASSERT(m);
ext_pgs = m->m_ext.ext_pgs;
ext_pgs = &m->m_ext_pgs;
for (int i = 0; i < ext_pgs->npgs; i++) {
pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]);
pg = PHYS_TO_VM_PAGE(ext_pgs->m_epg_pa[i]);
vm_page_unwire_noq(pg);
vm_page_free(pg);
}
@ -1653,9 +1656,9 @@ m_uiotombuf_nomap(struct uio *uio, int how, int len, int maxseg, int flags)
* Allocate the pages
*/
m = NULL;
MPASS((flags & M_PKTHDR) == 0);
while (total > 0) {
mb = mb_alloc_ext_pgs(how, (flags & M_PKTHDR),
mb_free_mext_pgs);
mb = mb_alloc_ext_pgs(how, mb_free_mext_pgs);
if (mb == NULL)
goto failed;
if (m == NULL)
@ -1663,7 +1666,7 @@ m_uiotombuf_nomap(struct uio *uio, int how, int len, int maxseg, int flags)
else
prev->m_next = mb;
prev = mb;
pgs = mb->m_ext.ext_pgs;
pgs = &mb->m_ext_pgs;
pgs->flags = MBUF_PEXT_FLAG_ANON;
needed = length = MIN(maxseg, total);
for (i = 0; needed > 0; i++, needed -= PAGE_SIZE) {
@ -1678,7 +1681,7 @@ m_uiotombuf_nomap(struct uio *uio, int how, int len, int maxseg, int flags)
}
}
pg_array[i]->flags &= ~PG_ZERO;
pgs->pa[i] = VM_PAGE_TO_PHYS(pg_array[i]);
pgs->m_epg_pa[i] = VM_PAGE_TO_PHYS(pg_array[i]);
pgs->npgs++;
}
pgs->last_pg_len = length - PAGE_SIZE * (pgs->npgs - 1);
@ -1769,7 +1772,7 @@ m_unmappedtouio(const struct mbuf *m, int m_off, struct uio *uio, int len)
int error, i, off, pglen, pgoff, seglen, segoff;
MBUF_EXT_PGS_ASSERT(m);
ext_pgs = m->m_ext.ext_pgs;
ext_pgs = __DECONST(void *, &m->m_ext_pgs);
error = 0;
/* Skip over any data removed from the front. */
@ -1785,7 +1788,7 @@ m_unmappedtouio(const struct mbuf *m, int m_off, struct uio *uio, int len)
seglen = min(seglen, len);
off = 0;
len -= seglen;
error = uiomove(&ext_pgs->hdr[segoff], seglen, uio);
error = uiomove(&ext_pgs->m_epg_hdr[segoff], seglen, uio);
}
}
pgoff = ext_pgs->first_pg_off;
@ -1801,7 +1804,7 @@ m_unmappedtouio(const struct mbuf *m, int m_off, struct uio *uio, int len)
off = 0;
seglen = min(seglen, len);
len -= seglen;
pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]);
pg = PHYS_TO_VM_PAGE(ext_pgs->m_epg_pa[i]);
error = uiomove_fromphys(&pg, segoff, seglen, uio);
pgoff = 0;
};
@ -1809,7 +1812,7 @@ m_unmappedtouio(const struct mbuf *m, int m_off, struct uio *uio, int len)
KASSERT((off + len) <= ext_pgs->trail_len,
("off + len > trail (%d + %d > %d, m_off = %d)", off, len,
ext_pgs->trail_len, m_off));
error = uiomove(&ext_pgs->trail[off], len, uio);
error = uiomove(&ext_pgs->m_epg_trail[off], len, uio);
}
return (error);
}

View File

@ -131,15 +131,15 @@ sbready_compress(struct sockbuf *sb, struct mbuf *m0, struct mbuf *end)
struct mbuf_ext_pgs *mpgs, *npgs;
int hdr_len, trail_len;
mpgs = m->m_ext.ext_pgs;
npgs = n->m_ext.ext_pgs;
mpgs = &m->m_ext_pgs;
npgs = &n->m_ext_pgs;
hdr_len = npgs->hdr_len;
trail_len = mpgs->trail_len;
if (trail_len != 0 && hdr_len != 0 &&
trail_len + hdr_len <= MBUF_PEXT_TRAIL_LEN) {
/* copy n's header to m's trailer */
memcpy(&mpgs->trail[trail_len], npgs->hdr,
hdr_len);
memcpy(&m->m_epg_trail[trail_len],
n->m_epg_hdr, hdr_len);
mpgs->trail_len += hdr_len;
m->m_len += hdr_len;
npgs->hdr_len = 0;
@ -214,13 +214,13 @@ sbready(struct sockbuf *sb, struct mbuf *m0, int count)
("%s: m %p !M_NOTREADY", __func__, m));
if ((m->m_flags & M_EXT) != 0 &&
m->m_ext.ext_type == EXT_PGS) {
if (count < m->m_ext.ext_pgs->nrdy) {
m->m_ext.ext_pgs->nrdy -= count;
if (count < m->m_ext_pgs.nrdy) {
m->m_ext_pgs.nrdy -= count;
count = 0;
break;
}
count -= m->m_ext.ext_pgs->nrdy;
m->m_ext.ext_pgs->nrdy = 0;
count -= m->m_ext_pgs.nrdy;
m->m_ext_pgs.nrdy = 0;
} else
count--;

View File

@ -231,7 +231,7 @@ ip_output_send(struct inpcb *inp, struct ifnet *ifp, struct mbuf *m,
* dropping the mbuf's reference) in if_output.
*/
if (m->m_next != NULL && mbuf_has_tls_session(m->m_next)) {
tls = ktls_hold(m->m_next->m_ext.ext_pgs->tls);
tls = ktls_hold(m->m_next->m_ext_pgs.tls);
mst = tls->snd_tag;
/*

View File

@ -1908,7 +1908,7 @@ tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
pkthdrlen = NULL;
#ifdef KERN_TLS
if (hw_tls && (m->m_flags & M_NOMAP))
tls = m->m_ext.ext_pgs->tls;
tls = m->m_ext_pgs.tls;
else
tls = NULL;
start = m;
@ -1925,7 +1925,7 @@ tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
#ifdef KERN_TLS
if (hw_tls) {
if (m->m_flags & M_NOMAP)
ntls = m->m_ext.ext_pgs->tls;
ntls = m->m_ext_pgs.tls;
else
ntls = NULL;

View File

@ -340,7 +340,7 @@ ip6_output_send(struct inpcb *inp, struct ifnet *ifp, struct ifnet *origifp,
* dropping the mbuf's reference) in if_output.
*/
if (m->m_next != NULL && mbuf_has_tls_session(m->m_next)) {
tls = ktls_hold(m->m_next->m_ext.ext_pgs->tls);
tls = ktls_hold(m->m_next->m_ext_pgs.tls);
mst = tls->snd_tag;
/*

View File

@ -202,6 +202,45 @@ struct pkthdr {
#define lro_csum PH_loc.sixteen[1] /* inbound during LRO (no reassembly) */
/* Note PH_loc is used during IP reassembly (all 8 bytes as a ptr) */
/*
* TLS records for TLS 1.0-1.2 can have the following header lengths:
* - 5 (AES-CBC with implicit IV)
* - 21 (AES-CBC with explicit IV)
* - 13 (AES-GCM with 8 byte explicit IV)
*/
#define MBUF_PEXT_HDR_LEN 23
/*
* TLS records for TLS 1.0-1.2 can have the following maximum trailer
* lengths:
* - 16 (AES-GCM)
* - 36 (AES-CBC with SHA1 and up to 16 bytes of padding)
* - 48 (AES-CBC with SHA2-256 and up to 16 bytes of padding)
* - 64 (AES-CBC with SHA2-384 and up to 16 bytes of padding)
*/
#define MBUF_PEXT_TRAIL_LEN 64
#if defined(__LP64__)
#define MBUF_PEXT_MAX_PGS (40 / sizeof(vm_paddr_t))
#else
#define MBUF_PEXT_MAX_PGS (72 / sizeof(vm_paddr_t))
#endif
#define MBUF_PEXT_MAX_BYTES \
(MBUF_PEXT_MAX_PGS * PAGE_SIZE + MBUF_PEXT_HDR_LEN + MBUF_PEXT_TRAIL_LEN)
#define MBUF_PEXT_FLAG_ANON 1 /* Data can be encrypted in place. */
struct mbuf_ext_pgs_data {
vm_paddr_t pa[MBUF_PEXT_MAX_PGS]; /* phys addrs of pgs */
char trail[MBUF_PEXT_TRAIL_LEN]; /* TLS trailer */
char hdr[MBUF_PEXT_HDR_LEN]; /* TLS header */
};
struct ktls_session;
struct socket;
/*
* Description of external storage mapped into mbuf; valid only if M_EXT is
* set.
@ -224,18 +263,10 @@ struct m_ext {
volatile u_int ext_count;
volatile u_int *ext_cnt;
};
union {
/*
* If ext_type == EXT_PGS, 'ext_pgs' points to a
* structure describing the buffer. Otherwise,
* 'ext_buf' points to the start of the buffer.
*/
struct mbuf_ext_pgs *ext_pgs;
char *ext_buf;
};
uint32_t ext_size; /* size of buffer, for ext_free */
uint32_t ext_type:8, /* type of external storage */
ext_flags:24; /* external storage mbuf flags */
char *ext_buf; /* start of buffer */
/*
* Fields below store the free context for the external storage.
* They are valid only in the refcount carrying mbuf, the one with
@ -246,9 +277,38 @@ struct m_ext {
#define m_ext_copylen offsetof(struct m_ext, ext_free)
m_ext_free_t *ext_free; /* free routine if not the usual */
void *ext_arg1; /* optional argument pointer */
void *ext_arg2; /* optional argument pointer */
union {
void *ext_arg2; /* optional argument pointer */
struct mbuf_ext_pgs_data ext_pgs;
};
};
struct mbuf_ext_pgs {
uint8_t npgs; /* Number of attached pages */
uint8_t nrdy; /* Pages with I/O pending */
uint8_t hdr_len; /* TLS header length */
uint8_t trail_len; /* TLS trailer length */
uint16_t first_pg_off; /* Offset into 1st page */
uint16_t last_pg_len; /* Length of last page */
uint8_t flags; /* Flags */
uint8_t record_type;
uint8_t spare[2];
int enc_cnt;
struct ktls_session *tls; /* TLS session */
struct socket *so;
uint64_t seqno;
struct mbuf *mbuf;
STAILQ_ENTRY(mbuf_ext_pgs) stailq;
#if !defined(__LP64__)
uint8_t pad[8]; /* pad to size of pkthdr */
#endif
struct m_ext m_ext;
};
#define m_epg_hdr m_ext.ext_pgs.hdr
#define m_epg_trail m_ext.ext_pgs.trail
#define m_epg_pa m_ext.ext_pgs.pa
/*
* The core of the mbuf object along with some shortcut defines for practical
* purposes.
@ -287,86 +347,20 @@ struct mbuf {
* order to support future work on variable-size mbufs.
*/
union {
struct {
struct pkthdr m_pkthdr; /* M_PKTHDR set */
union {
struct m_ext m_ext; /* M_EXT set */
char m_pktdat[0];
union {
struct {
struct pkthdr m_pkthdr; /* M_PKTHDR set */
union {
struct m_ext m_ext; /* M_EXT set */
char m_pktdat[0];
};
};
struct mbuf_ext_pgs m_ext_pgs;
};
char m_dat[0]; /* !M_PKTHDR, !M_EXT */
};
};
struct ktls_session;
struct socket;
/*
* TLS records for TLS 1.0-1.2 can have the following header lengths:
* - 5 (AES-CBC with implicit IV)
* - 21 (AES-CBC with explicit IV)
* - 13 (AES-GCM with 8 byte explicit IV)
*/
#define MBUF_PEXT_HDR_LEN 23
/*
* TLS records for TLS 1.0-1.2 can have the following maximum trailer
* lengths:
* - 16 (AES-GCM)
* - 36 (AES-CBC with SHA1 and up to 16 bytes of padding)
* - 48 (AES-CBC with SHA2-256 and up to 16 bytes of padding)
* - 64 (AES-CBC with SHA2-384 and up to 16 bytes of padding)
*/
#define MBUF_PEXT_TRAIL_LEN 64
#ifdef __LP64__
#define MBUF_PEXT_MAX_PGS (152 / sizeof(vm_paddr_t))
#else
#define MBUF_PEXT_MAX_PGS (156 / sizeof(vm_paddr_t))
#endif
#define MBUF_PEXT_MAX_BYTES \
(MBUF_PEXT_MAX_PGS * PAGE_SIZE + MBUF_PEXT_HDR_LEN + MBUF_PEXT_TRAIL_LEN)
#define MBUF_PEXT_FLAG_ANON 1 /* Data can be encrypted in place. */
/*
* This struct is 256 bytes in size and is arranged so that the most
* common case (accessing the first 4 pages of a 16KB TLS record) will
* fit in a single 64 byte cacheline.
*/
struct mbuf_ext_pgs {
uint8_t npgs; /* Number of attached pages */
uint8_t nrdy; /* Pages with I/O pending */
uint8_t hdr_len; /* TLS header length */
uint8_t trail_len; /* TLS trailer length */
uint16_t first_pg_off; /* Offset into 1st page */
uint16_t last_pg_len; /* Length of last page */
vm_paddr_t pa[MBUF_PEXT_MAX_PGS]; /* phys addrs of pages */
char hdr[MBUF_PEXT_HDR_LEN]; /* TLS header */
uint8_t flags; /* Flags */
struct ktls_session *tls; /* TLS session */
#if defined(__i386__) || \
(defined(__powerpc__) && !defined(__powerpc64__) && defined(BOOKE))
/*
* i386 and Book-E PowerPC have 64-bit vm_paddr_t, so there is
* a 4 byte remainder from the space allocated for pa[].
*/
uint32_t pad;
#endif
union {
char trail[MBUF_PEXT_TRAIL_LEN]; /* TLS trailer */
struct {
uint8_t record_type; /* Must be first */
struct socket *so;
struct mbuf *mbuf;
uint64_t seqno;
STAILQ_ENTRY(mbuf_ext_pgs) stailq;
int enc_cnt;
};
};
};
#ifdef _KERNEL
static inline int
mbuf_ext_pg_len(struct mbuf_ext_pgs *ext_pgs, int pidx, int pgoff)
@ -699,7 +693,7 @@ extern uma_zone_t zone_extpgs;
void mb_dupcl(struct mbuf *, struct mbuf *);
void mb_free_ext(struct mbuf *);
void mb_free_mext_pgs(struct mbuf *);
struct mbuf *mb_alloc_ext_pgs(int, bool, m_ext_free_t);
struct mbuf *mb_alloc_ext_pgs(int, m_ext_free_t);
int mb_unmapped_compress(struct mbuf *m);
struct mbuf *mb_unmapped_to_ext(struct mbuf *m);
void mb_free_notready(struct mbuf *m, int count);
@ -1515,7 +1509,7 @@ mbuf_has_tls_session(struct mbuf *m)
if (m->m_flags & M_NOMAP) {
MBUF_EXT_PGS_ASSERT(m);
if (m->m_ext.ext_pgs->tls != NULL) {
if (m->m_ext_pgs.tls != NULL) {
return (true);
}
}