unix/dgram: reduce mbuf chain traversals in send(2) and recv(2)
o Use m_pkthdr.memlen from m_uiotombuf() o Modify unp_internalize() to keep track of allocated space and memory as well as pointer to the last buffer. o Modify unp_addsockcred() to keep track of allocated space and memory as well as pointer to the last buffer. o Record the datagram len/memlen/ctllen in the first (from) mbuf of the chain in uipc_sosend_dgram() and reuse it in uipc_soreceive_dgram(). Reviewed by: markj Differential revision: https://reviews.freebsd.org/D35302
This commit is contained in:
parent
9b841b0e23
commit
1093f16487
@ -302,11 +302,13 @@ static void unp_gc(__unused void *, int);
|
|||||||
static void unp_scan(struct mbuf *, void (*)(struct filedescent **, int));
|
static void unp_scan(struct mbuf *, void (*)(struct filedescent **, int));
|
||||||
static void unp_discard(struct file *);
|
static void unp_discard(struct file *);
|
||||||
static void unp_freerights(struct filedescent **, int);
|
static void unp_freerights(struct filedescent **, int);
|
||||||
static int unp_internalize(struct mbuf **, struct thread *);
|
static int unp_internalize(struct mbuf **, struct thread *,
|
||||||
|
struct mbuf **, u_int *, u_int *);
|
||||||
static void unp_internalize_fp(struct file *);
|
static void unp_internalize_fp(struct file *);
|
||||||
static int unp_externalize(struct mbuf *, struct mbuf **, int);
|
static int unp_externalize(struct mbuf *, struct mbuf **, int);
|
||||||
static int unp_externalize_fp(struct file *);
|
static int unp_externalize_fp(struct file *);
|
||||||
static struct mbuf *unp_addsockcred(struct thread *, struct mbuf *, int);
|
static struct mbuf *unp_addsockcred(struct thread *, struct mbuf *,
|
||||||
|
int, struct mbuf **, u_int *, u_int *);
|
||||||
static void unp_process_defers(void * __unused, int);
|
static void unp_process_defers(void * __unused, int);
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -1014,7 +1016,8 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
|
|||||||
error = EOPNOTSUPP;
|
error = EOPNOTSUPP;
|
||||||
goto release;
|
goto release;
|
||||||
}
|
}
|
||||||
if (control != NULL && (error = unp_internalize(&control, td)))
|
if (control != NULL &&
|
||||||
|
(error = unp_internalize(&control, td, NULL, NULL, NULL)))
|
||||||
goto release;
|
goto release;
|
||||||
|
|
||||||
unp2 = NULL;
|
unp2 = NULL;
|
||||||
@ -1051,7 +1054,8 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
|
|||||||
* SOCK_SEQPACKET (LOCAL_CREDS => WANTCRED_ONESHOT), or
|
* SOCK_SEQPACKET (LOCAL_CREDS => WANTCRED_ONESHOT), or
|
||||||
* forever (LOCAL_CREDS_PERSISTENT => WANTCRED_ALWAYS).
|
* forever (LOCAL_CREDS_PERSISTENT => WANTCRED_ALWAYS).
|
||||||
*/
|
*/
|
||||||
control = unp_addsockcred(td, control, unp2->unp_flags);
|
control = unp_addsockcred(td, control, unp2->unp_flags, NULL,
|
||||||
|
NULL, NULL);
|
||||||
unp2->unp_flags &= ~UNP_WANTCRED_ONESHOT;
|
unp2->unp_flags &= ~UNP_WANTCRED_ONESHOT;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1131,7 +1135,13 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
|
|||||||
*
|
*
|
||||||
* Allocate a record consisting of 3 mbufs in the sequence of
|
* Allocate a record consisting of 3 mbufs in the sequence of
|
||||||
* from -> control -> data and append it to the socket buffer.
|
* from -> control -> data and append it to the socket buffer.
|
||||||
|
*
|
||||||
|
* The first mbuf carries sender's name and is a pkthdr that stores
|
||||||
|
* overall length of datagram, its memory consumption and control length.
|
||||||
*/
|
*/
|
||||||
|
#define ctllen PH_loc.thirtytwo[1]
|
||||||
|
_Static_assert(offsetof(struct pkthdr, memlen) + sizeof(u_int) <=
|
||||||
|
offsetof(struct pkthdr, ctllen), "unix/dgram can not store ctllen");
|
||||||
static int
|
static int
|
||||||
uipc_sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
|
uipc_sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
|
||||||
struct mbuf *m, struct mbuf *c, int flags, struct thread *td)
|
struct mbuf *m, struct mbuf *c, int flags, struct thread *td)
|
||||||
@ -1140,14 +1150,16 @@ uipc_sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
|
|||||||
const struct sockaddr *from;
|
const struct sockaddr *from;
|
||||||
struct socket *so2;
|
struct socket *so2;
|
||||||
struct sockbuf *sb;
|
struct sockbuf *sb;
|
||||||
struct mbuf *f;
|
struct mbuf *f, *clast;
|
||||||
u_int cc;
|
u_int cc, ctl, mbcnt;
|
||||||
|
u_int dcc __diagused, dctl __diagused, dmbcnt __diagused;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
MPASS((uio != NULL && m == NULL) || (m != NULL && uio == NULL));
|
MPASS((uio != NULL && m == NULL) || (m != NULL && uio == NULL));
|
||||||
|
|
||||||
error = 0;
|
error = 0;
|
||||||
f = NULL;
|
f = NULL;
|
||||||
|
ctl = 0;
|
||||||
|
|
||||||
if (__predict_false(flags & MSG_OOB)) {
|
if (__predict_false(flags & MSG_OOB)) {
|
||||||
error = EOPNOTSUPP;
|
error = EOPNOTSUPP;
|
||||||
@ -1163,8 +1175,11 @@ uipc_sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
|
|||||||
error = EFAULT;
|
error = EFAULT;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
f = m_get(M_WAITOK, MT_SONAME);
|
f = m_gethdr(M_WAITOK, MT_SONAME);
|
||||||
if (c != NULL && (error = unp_internalize(&c, td)))
|
cc = m->m_pkthdr.len;
|
||||||
|
mbcnt = MSIZE + m->m_pkthdr.memlen;
|
||||||
|
if (c != NULL &&
|
||||||
|
(error = unp_internalize(&c, td, &clast, &ctl, &mbcnt)))
|
||||||
goto out;
|
goto out;
|
||||||
} else {
|
} else {
|
||||||
/* pru_sosend() with mbuf usually is a kernel thread. */
|
/* pru_sosend() with mbuf usually is a kernel thread. */
|
||||||
@ -1177,7 +1192,7 @@ uipc_sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
|
|||||||
error = EMSGSIZE;
|
error = EMSGSIZE;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
if ((f = m_get(M_NOWAIT, MT_SONAME)) == NULL) {
|
if ((f = m_gethdr(M_NOWAIT, MT_SONAME)) == NULL) {
|
||||||
error = ENOBUFS;
|
error = ENOBUFS;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
@ -1189,6 +1204,14 @@ uipc_sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
|
|||||||
m->m_pkthdr.csum_flags = 0;
|
m->m_pkthdr.csum_flags = 0;
|
||||||
m->m_pkthdr.fibnum = 0;
|
m->m_pkthdr.fibnum = 0;
|
||||||
m->m_pkthdr.rsstype = 0;
|
m->m_pkthdr.rsstype = 0;
|
||||||
|
|
||||||
|
cc = m->m_pkthdr.len;
|
||||||
|
mbcnt = MSIZE;
|
||||||
|
for (struct mbuf *mb = m; mb != NULL; mb = mb->m_next) {
|
||||||
|
mbcnt += MSIZE;
|
||||||
|
if (mb->m_flags & M_EXT)
|
||||||
|
mbcnt += mb->m_ext.ext_size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unp = sotounpcb(so);
|
unp = sotounpcb(so);
|
||||||
@ -1240,7 +1263,8 @@ uipc_sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (unp2->unp_flags & UNP_WANTCRED_MASK)
|
if (unp2->unp_flags & UNP_WANTCRED_MASK)
|
||||||
c = unp_addsockcred(td, c, unp2->unp_flags);
|
c = unp_addsockcred(td, c, unp2->unp_flags, &clast, &ctl,
|
||||||
|
&mbcnt);
|
||||||
if (unp->unp_addr != NULL)
|
if (unp->unp_addr != NULL)
|
||||||
from = (struct sockaddr *)unp->unp_addr;
|
from = (struct sockaddr *)unp->unp_addr;
|
||||||
else
|
else
|
||||||
@ -1248,36 +1272,55 @@ uipc_sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
|
|||||||
f->m_len = from->sa_len;
|
f->m_len = from->sa_len;
|
||||||
MPASS(from->sa_len <= MLEN);
|
MPASS(from->sa_len <= MLEN);
|
||||||
bcopy(from, mtod(f, void *), from->sa_len);
|
bcopy(from, mtod(f, void *), from->sa_len);
|
||||||
cc = f->m_len + m->m_pkthdr.len;
|
ctl += f->m_len;
|
||||||
|
|
||||||
/* Concatenate: from -> control -> data. */
|
/*
|
||||||
|
* Concatenate mbufs: from -> control -> data.
|
||||||
|
* Save overall cc and mbcnt in "from" mbuf.
|
||||||
|
*/
|
||||||
if (c != NULL) {
|
if (c != NULL) {
|
||||||
struct mbuf *clast;
|
#ifdef INVARIANTS
|
||||||
|
struct mbuf *mc;
|
||||||
|
|
||||||
cc += m_length(c, &clast);
|
for (mc = c; mc->m_next != NULL; mc = mc->m_next);
|
||||||
|
MPASS(mc == clast);
|
||||||
|
#endif
|
||||||
f->m_next = c;
|
f->m_next = c;
|
||||||
clast->m_next = m;
|
clast->m_next = m;
|
||||||
c = NULL;
|
c = NULL;
|
||||||
} else
|
} else
|
||||||
f->m_next = m;
|
f->m_next = m;
|
||||||
m = NULL;
|
m = NULL;
|
||||||
|
#ifdef INVARIANTS
|
||||||
|
dcc = dctl = dmbcnt = 0;
|
||||||
|
for (struct mbuf *mb = f; mb != NULL; mb = mb->m_next) {
|
||||||
|
if (mb->m_type == MT_DATA)
|
||||||
|
dcc += mb->m_len;
|
||||||
|
else
|
||||||
|
dctl += mb->m_len;
|
||||||
|
dmbcnt += MSIZE;
|
||||||
|
if (mb->m_flags & M_EXT)
|
||||||
|
dmbcnt += mb->m_ext.ext_size;
|
||||||
|
}
|
||||||
|
MPASS(dcc == cc);
|
||||||
|
MPASS(dctl == ctl);
|
||||||
|
MPASS(dmbcnt == mbcnt);
|
||||||
|
#endif
|
||||||
|
f->m_pkthdr.len = cc + ctl;
|
||||||
|
f->m_pkthdr.memlen = mbcnt;
|
||||||
|
f->m_pkthdr.ctllen = ctl;
|
||||||
|
|
||||||
so2 = unp2->unp_socket;
|
so2 = unp2->unp_socket;
|
||||||
sb = &so2->so_rcv;
|
sb = &so2->so_rcv;
|
||||||
SOCK_RECVBUF_LOCK(so2);
|
SOCK_RECVBUF_LOCK(so2);
|
||||||
if (cc <= sbspace(sb)) {
|
if (cc <= sbspace(sb)) {
|
||||||
STAILQ_INSERT_TAIL(&sb->uxdg_mb, f, m_stailqpkt);
|
STAILQ_INSERT_TAIL(&sb->uxdg_mb, f, m_stailqpkt);
|
||||||
/* XXX: would be nice if m_uiotombuf() returns count. */
|
sb->sb_acc += cc + ctl;
|
||||||
for (; f != NULL; f = f->m_next) {
|
sb->sb_ccc += cc + ctl;
|
||||||
if (f->m_type != MT_DATA)
|
sb->sb_ctl += ctl;
|
||||||
sb->sb_ctl += f->m_len;
|
sb->sb_mbcnt += mbcnt;
|
||||||
sb->sb_mbcnt += MSIZE;
|
|
||||||
if (f->m_flags & M_EXT)
|
|
||||||
sb->sb_mbcnt += f->m_ext.ext_size;
|
|
||||||
}
|
|
||||||
sb->sb_acc += cc;
|
|
||||||
sb->sb_ccc += cc;
|
|
||||||
sorwakeup_locked(so2);
|
sorwakeup_locked(so2);
|
||||||
|
f = NULL;
|
||||||
} else {
|
} else {
|
||||||
soroverflow_locked(so2);
|
soroverflow_locked(so2);
|
||||||
error = (so->so_state & SS_NBIO) ? EAGAIN : ENOBUFS;
|
error = (so->so_state & SS_NBIO) ? EAGAIN : ENOBUFS;
|
||||||
@ -1366,7 +1409,7 @@ static int
|
|||||||
uipc_soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio,
|
uipc_soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio,
|
||||||
struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
|
struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
|
||||||
{
|
{
|
||||||
struct mbuf *m, *m2;
|
struct mbuf *m;
|
||||||
int flags, error;
|
int flags, error;
|
||||||
ssize_t len;
|
ssize_t len;
|
||||||
bool nonblock;
|
bool nonblock;
|
||||||
@ -1419,7 +1462,9 @@ uipc_soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio,
|
|||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
SOCK_RECVBUF_LOCK_ASSERT(so);
|
|
||||||
|
M_ASSERTPKTHDR(m);
|
||||||
|
KASSERT(m->m_type == MT_SONAME, ("m->m_type == %d", m->m_type));
|
||||||
|
|
||||||
if (uio->uio_td)
|
if (uio->uio_td)
|
||||||
uio->uio_td->td_ru.ru_msgrcv++;
|
uio->uio_td->td_ru.ru_msgrcv++;
|
||||||
@ -1428,18 +1473,12 @@ uipc_soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio,
|
|||||||
return (uipc_peek_dgram(so, psa, uio, controlp, flagsp));
|
return (uipc_peek_dgram(so, psa, uio, controlp, flagsp));
|
||||||
|
|
||||||
STAILQ_REMOVE_HEAD(&so->so_rcv.uxdg_mb, m_stailqpkt);
|
STAILQ_REMOVE_HEAD(&so->so_rcv.uxdg_mb, m_stailqpkt);
|
||||||
for (m2 = m; m2 != NULL; m2 = m2->m_next) {
|
so->so_rcv.sb_acc -= m->m_pkthdr.len;
|
||||||
if (m2->m_type != MT_DATA)
|
so->so_rcv.sb_ccc -= m->m_pkthdr.len;
|
||||||
so->so_rcv.sb_ctl -= m2->m_len;
|
so->so_rcv.sb_ctl -= m->m_pkthdr.ctllen;
|
||||||
so->so_rcv.sb_acc -= m2->m_len;
|
so->so_rcv.sb_mbcnt -= m->m_pkthdr.memlen;
|
||||||
so->so_rcv.sb_ccc -= m2->m_len;
|
|
||||||
so->so_rcv.sb_mbcnt -= MSIZE;
|
|
||||||
if (m2->m_flags & M_EXT)
|
|
||||||
so->so_rcv.sb_mbcnt -= m2->m_ext.ext_size;
|
|
||||||
}
|
|
||||||
SOCK_RECVBUF_UNLOCK(so);
|
SOCK_RECVBUF_UNLOCK(so);
|
||||||
|
|
||||||
KASSERT(m->m_type == MT_SONAME, ("m->m_type == %d", m->m_type));
|
|
||||||
if (psa != NULL)
|
if (psa != NULL)
|
||||||
*psa = sodupsockaddr(mtod(m, struct sockaddr *), M_WAITOK);
|
*psa = sodupsockaddr(mtod(m, struct sockaddr *), M_WAITOK);
|
||||||
m = m_free(m);
|
m = m_free(m);
|
||||||
@ -2510,7 +2549,8 @@ unp_internalize_cleanup_rights(struct mbuf *control)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
unp_internalize(struct mbuf **controlp, struct thread *td)
|
unp_internalize(struct mbuf **controlp, struct thread *td,
|
||||||
|
struct mbuf **clast, u_int *space, u_int *mbcnt)
|
||||||
{
|
{
|
||||||
struct mbuf *control, **initial_controlp;
|
struct mbuf *control, **initial_controlp;
|
||||||
struct proc *p;
|
struct proc *p;
|
||||||
@ -2527,6 +2567,7 @@ unp_internalize(struct mbuf **controlp, struct thread *td)
|
|||||||
int i, j, error, *fdp, oldfds;
|
int i, j, error, *fdp, oldfds;
|
||||||
u_int newlen;
|
u_int newlen;
|
||||||
|
|
||||||
|
MPASS((*controlp)->m_next == NULL); /* COMPAT_OLDSOCK may violate */
|
||||||
UNP_LINK_UNLOCK_ASSERT();
|
UNP_LINK_UNLOCK_ASSERT();
|
||||||
|
|
||||||
p = td->td_proc;
|
p = td->td_proc;
|
||||||
@ -2672,6 +2713,13 @@ unp_internalize(struct mbuf **controlp, struct thread *td)
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (space != NULL) {
|
||||||
|
*space += (*controlp)->m_len;
|
||||||
|
*mbcnt += MSIZE;
|
||||||
|
if ((*controlp)->m_flags & M_EXT)
|
||||||
|
*mbcnt += (*controlp)->m_ext.ext_size;
|
||||||
|
*clast = *controlp;
|
||||||
|
}
|
||||||
controlp = &(*controlp)->m_next;
|
controlp = &(*controlp)->m_next;
|
||||||
}
|
}
|
||||||
if (clen > 0)
|
if (clen > 0)
|
||||||
@ -2685,7 +2733,8 @@ unp_internalize(struct mbuf **controlp, struct thread *td)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static struct mbuf *
|
static struct mbuf *
|
||||||
unp_addsockcred(struct thread *td, struct mbuf *control, int mode)
|
unp_addsockcred(struct thread *td, struct mbuf *control, int mode,
|
||||||
|
struct mbuf **clast, u_int *space, u_int *mbcnt)
|
||||||
{
|
{
|
||||||
struct mbuf *m, *n, *n_prev;
|
struct mbuf *m, *n, *n_prev;
|
||||||
const struct cmsghdr *cm;
|
const struct cmsghdr *cm;
|
||||||
@ -2704,6 +2753,7 @@ unp_addsockcred(struct thread *td, struct mbuf *control, int mode)
|
|||||||
m = sbcreatecontrol(NULL, ctrlsz, cmsgtype, SOL_SOCKET, M_NOWAIT);
|
m = sbcreatecontrol(NULL, ctrlsz, cmsgtype, SOL_SOCKET, M_NOWAIT);
|
||||||
if (m == NULL)
|
if (m == NULL)
|
||||||
return (control);
|
return (control);
|
||||||
|
MPASS((m->m_flags & M_EXT) == 0 && m->m_next == NULL);
|
||||||
|
|
||||||
if (mode & UNP_WANTCRED_ALWAYS) {
|
if (mode & UNP_WANTCRED_ALWAYS) {
|
||||||
struct sockcred2 *sc;
|
struct sockcred2 *sc;
|
||||||
@ -2745,6 +2795,25 @@ unp_addsockcred(struct thread *td, struct mbuf *control, int mode)
|
|||||||
control = n->m_next;
|
control = n->m_next;
|
||||||
else
|
else
|
||||||
n_prev->m_next = n->m_next;
|
n_prev->m_next = n->m_next;
|
||||||
|
if (space != NULL) {
|
||||||
|
MPASS(*space >= n->m_len);
|
||||||
|
*space -= n->m_len;
|
||||||
|
MPASS(*mbcnt >= MSIZE);
|
||||||
|
*mbcnt -= MSIZE;
|
||||||
|
if (n->m_flags & M_EXT) {
|
||||||
|
MPASS(*mbcnt >=
|
||||||
|
n->m_ext.ext_size);
|
||||||
|
*mbcnt -= n->m_ext.ext_size;
|
||||||
|
}
|
||||||
|
MPASS(clast);
|
||||||
|
if (*clast == n) {
|
||||||
|
MPASS(n->m_next == NULL);
|
||||||
|
if (n_prev == NULL)
|
||||||
|
*clast = m;
|
||||||
|
else
|
||||||
|
*clast = n_prev;
|
||||||
|
}
|
||||||
|
}
|
||||||
n = m_free(n);
|
n = m_free(n);
|
||||||
} else {
|
} else {
|
||||||
n_prev = n;
|
n_prev = n;
|
||||||
@ -2754,6 +2823,12 @@ unp_addsockcred(struct thread *td, struct mbuf *control, int mode)
|
|||||||
|
|
||||||
/* Prepend it to the head. */
|
/* Prepend it to the head. */
|
||||||
m->m_next = control;
|
m->m_next = control;
|
||||||
|
if (space != NULL) {
|
||||||
|
*space += m->m_len;
|
||||||
|
*mbcnt += MSIZE;
|
||||||
|
if (control == NULL)
|
||||||
|
*clast = m;
|
||||||
|
}
|
||||||
return (m);
|
return (m);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user