cxgbei: Support DDP for target I/O S/G lists with more than one entry.

A CAM target layer I/O CCB can use a S/G list of virtual address ranges
to describe its data buffer.  This change adds zero-copy receive support
for such requests.

Sponsored by:	Chelsio Communications
Differential Revision:	https://reviews.freebsd.org/D29908
This commit is contained in:
John Baldwin 2021-05-14 12:17:06 -07:00
parent 23b209ee88
commit 46bee8043e
3 changed files with 232 additions and 14 deletions

View File

@ -873,6 +873,28 @@ icl_cxgbei_conn_task_done(struct icl_conn *ic, void *arg)
}
}
static inline bool
ddp_sgl_check(struct ctl_sg_entry *sg, int entries, int xferlen)
{
int total_len = 0;
MPASS(entries > 0);
if (((vm_offset_t)sg[--entries].addr & 3U) != 0)
return (false);
total_len += sg[entries].len;
while (--entries >= 0) {
if (((vm_offset_t)sg[entries].addr & PAGE_MASK) != 0 ||
(sg[entries].len % PAGE_SIZE) != 0)
return (false);
total_len += sg[entries].len;
}
MPASS(total_len == xferlen);
return (true);
}
/* XXXNP: PDU should be passed in as parameter, like on the initiator. */
#define io_to_request_pdu(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr)
#define io_to_ppod_reservation(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND2].ptr)
@ -888,6 +910,8 @@ icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io,
struct cxgbei_data *ci = sc->iscsi_ulp_softc;
struct ppod_region *pr = &ci->pr;
struct ppod_reservation *prsv;
struct ctl_sg_entry *sgl, sg_entry;
int sg_entries = ctsio->kern_sg_entries;
uint32_t ttt;
int xferlen, rc = 0, alias;
@ -898,7 +922,6 @@ icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io,
if (ctsio->ext_data_filled == 0) {
int first_burst;
struct icl_pdu *ip = io_to_request_pdu(io);
vm_offset_t buf;
#ifdef INVARIANTS
struct icl_cxgbei_pdu *icp = ip_to_icp(ip);
@ -931,18 +954,16 @@ icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io,
return (0);
}
if (ctsio->kern_sg_entries == 0)
buf = (vm_offset_t)ctsio->kern_data_ptr;
else if (ctsio->kern_sg_entries == 1) {
struct ctl_sg_entry *sgl = (void *)ctsio->kern_data_ptr;
if (sg_entries == 0) {
sgl = &sg_entry;
sgl->len = xferlen;
sgl->addr = (void *)ctsio->kern_data_ptr;
sg_entries = 1;
} else
sgl = (void *)ctsio->kern_data_ptr;
MPASS(sgl->len == xferlen);
buf = (vm_offset_t)sgl->addr;
} else {
rc = EAGAIN; /* XXX implement */
if (!ddp_sgl_check(sgl, sg_entries, xferlen))
goto no_ddp;
}
/*
* Reserve resources for DDP, update the ttt that should be used
@ -956,14 +977,15 @@ icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io,
goto no_ddp;
}
rc = t4_alloc_page_pods_for_buf(pr, buf, xferlen, prsv);
rc = t4_alloc_page_pods_for_sgl(pr, sgl, sg_entries, prsv);
if (rc != 0) {
uma_zfree(prsv_zone, prsv);
goto no_ddp;
}
rc = t4_write_page_pods_for_buf(sc, toep, prsv, buf, xferlen);
if (rc != 0) {
rc = t4_write_page_pods_for_sgl(sc, toep, prsv, sgl, sg_entries,
xferlen);
if (__predict_false(rc != 0)) {
t4_free_page_pods(prsv);
uma_zfree(prsv_zone, prsv);
goto no_ddp;

View File

@ -62,6 +62,9 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_page.h>
#include <vm/vm_object.h>
#include <cam/scsi/scsi_all.h>
#include <cam/ctl/ctl_io.h>
#ifdef TCP_OFFLOAD
#include "common/common.h"
#include "common/t4_msg.h"
@ -981,6 +984,76 @@ t4_alloc_page_pods_for_buf(struct ppod_region *pr, vm_offset_t buf, int len,
return (0);
}
int
t4_alloc_page_pods_for_sgl(struct ppod_region *pr, struct ctl_sg_entry *sgl,
int entries, struct ppod_reservation *prsv)
{
int hcf, seglen, idx = 0, npages, nppods, i, len;
uintptr_t start_pva, end_pva, pva, p1 ;
vm_offset_t buf;
struct ctl_sg_entry *sge;
MPASS(entries > 0);
MPASS(sgl);
/*
* The DDP page size is unrelated to the VM page size. We combine
* contiguous physical pages into larger segments to get the best DDP
* page size possible. This is the largest of the four sizes in
* A_ULP_RX_ISCSI_PSZ that evenly divides the HCF of the segment sizes
* in the page list.
*/
hcf = 0;
for (i = entries - 1; i >= 0; i--) {
sge = sgl + i;
buf = (vm_offset_t)sge->addr;
len = sge->len;
start_pva = trunc_page(buf);
end_pva = trunc_page(buf + len - 1);
pva = start_pva;
while (pva <= end_pva) {
seglen = PAGE_SIZE;
p1 = pmap_kextract(pva);
pva += PAGE_SIZE;
while (pva <= end_pva && p1 + seglen ==
pmap_kextract(pva)) {
seglen += PAGE_SIZE;
pva += PAGE_SIZE;
}
hcf = calculate_hcf(hcf, seglen);
if (hcf < (1 << pr->pr_page_shift[1])) {
idx = 0;
goto have_pgsz; /* give up, short circuit */
}
}
}
#define PR_PAGE_MASK(x) ((1 << pr->pr_page_shift[(x)]) - 1)
MPASS((hcf & PR_PAGE_MASK(0)) == 0); /* PAGE_SIZE is >= 4K everywhere */
for (idx = nitems(pr->pr_page_shift) - 1; idx > 0; idx--) {
if ((hcf & PR_PAGE_MASK(idx)) == 0)
break;
}
#undef PR_PAGE_MASK
have_pgsz:
MPASS(idx <= M_PPOD_PGSZ);
npages = 0;
while (entries--) {
npages++;
start_pva = trunc_page(sgl->addr);
end_pva = trunc_page((vm_offset_t)sgl->addr + sgl->len - 1);
npages += (end_pva - start_pva) >> pr->pr_page_shift[idx];
sgl = sgl + 1;
}
nppods = howmany(npages, PPOD_PAGES);
if (alloc_page_pods(pr, nppods, idx, prsv) != 0)
return (ENOMEM);
MPASS(prsv->prsv_nppods > 0);
return (0);
}
void
t4_free_page_pods(struct ppod_reservation *prsv)
{
@ -1197,6 +1270,124 @@ t4_write_page_pods_for_buf(struct adapter *sc, struct toepcb *toep,
return (0);
}
int
t4_write_page_pods_for_sgl(struct adapter *sc, struct toepcb *toep,
struct ppod_reservation *prsv, struct ctl_sg_entry *sgl, int entries,
int xferlen)
{
struct inpcb *inp = toep->inp;
struct ulp_mem_io *ulpmc;
struct ulptx_idata *ulpsc;
struct pagepod *ppod;
int i, j, k, n, chunk, len, ddp_pgsz;
u_int ppod_addr, offset, sg_offset = 0;
uint32_t cmd;
struct ppod_region *pr = prsv->prsv_pr;
uintptr_t pva, pa;
struct mbuf *m;
struct mbufq wrq;
MPASS(sgl != NULL);
MPASS(entries > 0);
cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE));
if (is_t4(sc))
cmd |= htobe32(F_ULP_MEMIO_ORDER);
else
cmd |= htobe32(F_T5_ULP_MEMIO_IMM);
ddp_pgsz = 1 << pr->pr_page_shift[G_PPOD_PGSZ(prsv->prsv_tag)];
offset = (vm_offset_t)sgl->addr & PAGE_MASK;
ppod_addr = pr->pr_start + (prsv->prsv_tag & pr->pr_tag_mask);
pva = trunc_page(sgl->addr);
mbufq_init(&wrq, INT_MAX);
for (i = 0; i < prsv->prsv_nppods; ppod_addr += chunk) {
/* How many page pods are we writing in this cycle */
n = min(prsv->prsv_nppods - i, NUM_ULP_TX_SC_IMM_PPODS);
MPASS(n > 0);
chunk = PPOD_SZ(n);
len = roundup2(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16);
m = alloc_raw_wr_mbuf(len);
if (m == NULL) {
mbufq_drain(&wrq);
return (ENOMEM);
}
ulpmc = mtod(m, struct ulp_mem_io *);
INIT_ULPTX_WR(ulpmc, len, 0, toep->tid);
ulpmc->cmd = cmd;
ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32));
ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16));
ulpmc->lock_addr = htobe32(V_ULP_MEMIO_ADDR(ppod_addr >> 5));
ulpsc = (struct ulptx_idata *)(ulpmc + 1);
ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
ulpsc->len = htobe32(chunk);
ppod = (struct pagepod *)(ulpsc + 1);
for (j = 0; j < n; i++, j++, ppod++) {
ppod->vld_tid_pgsz_tag_color = htobe64(F_PPOD_VALID |
V_PPOD_TID(toep->tid) |
(prsv->prsv_tag & ~V_PPOD_PGSZ(M_PPOD_PGSZ)));
ppod->len_offset = htobe64(V_PPOD_LEN(xferlen) |
V_PPOD_OFST(offset));
ppod->rsvd = 0;
for (k = 0; k < nitems(ppod->addr); k++) {
if (entries != 0) {
pa = pmap_kextract(pva + sg_offset);
ppod->addr[k] = htobe64(pa);
} else
ppod->addr[k] = 0;
#if 0
CTR5(KTR_CXGBE,
"%s: tid %d ppod[%d]->addr[%d] = %p",
__func__, toep->tid, i, k,
htobe64(ppod->addr[k]));
#endif
/*
* If this is the last entry in a pod,
* reuse the same entry for first address
* in the next pod.
*/
if (k + 1 == nitems(ppod->addr))
break;
/*
* Don't move to the next DDP page if the
* sgl is already finished.
*/
if (entries == 0)
continue;
sg_offset += ddp_pgsz;
if (sg_offset == sgl->len) {
/*
* This sgl entry is done. Go
* to the next.
*/
entries--;
sgl++;
sg_offset = 0;
if (entries != 0)
pva = trunc_page(
(vm_offset_t)sgl->addr);
}
}
}
mbufq_enqueue(&wrq, m);
}
INP_WLOCK(inp);
mbufq_concat(&toep->ulp_pduq, &wrq);
INP_WUNLOCK(inp);
return (0);
}
/*
* Prepare a pageset for DDP. This sets up page pods.
*/

View File

@ -88,6 +88,7 @@ enum {
DDP_DEAD = (1 << 6), /* toepcb is shutting down */
};
struct ctl_sg_entry;
struct sockopt;
struct offload_settings;
@ -437,10 +438,14 @@ void t4_free_ppod_region(struct ppod_region *);
int t4_alloc_page_pods_for_ps(struct ppod_region *, struct pageset *);
int t4_alloc_page_pods_for_buf(struct ppod_region *, vm_offset_t, int,
struct ppod_reservation *);
int t4_alloc_page_pods_for_sgl(struct ppod_region *, struct ctl_sg_entry *, int,
struct ppod_reservation *);
int t4_write_page_pods_for_ps(struct adapter *, struct sge_wrq *, int,
struct pageset *);
int t4_write_page_pods_for_buf(struct adapter *, struct toepcb *,
struct ppod_reservation *, vm_offset_t, int);
int t4_write_page_pods_for_sgl(struct adapter *, struct toepcb *,
struct ppod_reservation *, struct ctl_sg_entry *, int, int);
void t4_free_page_pods(struct ppod_reservation *);
int t4_soreceive_ddp(struct socket *, struct sockaddr **, struct uio *,
struct mbuf **, struct mbuf **, int *);