Merge next step in socket buffer locking:

- sowakeup() now asserts the socket buffer lock on entry.  Move
  the call to KNOTE higher in sowakeup() so that it is made with
  the socket buffer lock held for consistency with other calls.
  Release the socket buffer lock prior to calling into pgsigio(),
  so_upcall(), or aio_swake().  Locking for this event management
  will need revisiting in the future, but this model avoids lock
  order reversals when upcalls into other subsystems result in
  socket/socket buffer operations.  Assert that the socket buffer
  lock is not held at the end of the function.

- Wrapper macros for sowakeup(), sorwakeup() and sowwakeup(), now
  have _locked versions which assert the socket buffer lock on
  entry.  If a wakeup is required by sb_notify(), invoke
  sowakeup(); otherwise, unconditionally release the socket buffer
  lock.  This results in the socket buffer lock being released
  whether a wakeup is required or not.

- Break out socantsendmore() into socantsendmore_locked() that
  asserts the socket buffer lock.  socantsendmore()
  unconditionally locks the socket buffer before calling
  socantsendmore_locked().  Note that both functions return with
  the socket buffer unlocked as socantsendmore_locked() calls
  sowwakeup_locked() which has the same properties.  Assert that
  the socket buffer is unlocked on return.

- Break out socantrcvmore() into socantrcvmore_locked() that
  asserts the socket buffer lock.  socantrcvmore() unconditionally
  locks the socket buffer before calling socantrcvmore_locked().
  Note that both functions return with the socket buffer unlocked
  as socantrcvmore_locked() calls sorwakeup_locked() which has
  similar properties.  Assert that the socket buffer is unlocked
  on return.

- Break out sbrelease() into a sbrelease_locked() that asserts the
  socket buffer lock.  sbrelease() unconditionally locks the
  socket buffer before calling sbrelease_locked().
  sbrelease_locked() now invokes sbflush_locked() instead of
  sbflush().

- Assert the socket buffer lock in socket buffer sanity check
  functions sblastrecordchk(), sblastmbufchk().

- Assert the socket buffer lock in SBLINKRECORD().

- Break out various sbappend() functions into sbappend_locked()
  (and variations on that name) that assert the socket buffer
  lock.  The !_locked() variations unconditionally lock the socket
  buffer before calling their _locked counterparts.  Internally,
  make sure to call _locked() support routines, etc, if already
  holding the socket buffer lock.

- Break out sbinsertoob() into sbinsertoob_locked() that asserts
  the socket buffer lock.  sbinsertoob() unconditionally locks the
  socket buffer before calling sbinsertoob_locked().

- Break out sbflush() into sbflush_locked() that asserts the
  socket buffer lock.  sbflush() unconditionally locks the socket
  buffer before calling sbflush_locked().  Update panic strings
  for new function names.

- Break out sbdrop() into sbdrop_locked() that asserts the socket
  buffer lock.  sbdrop() unconditionally locks the socket buffer
  before calling sbdrop_locked().

- Break out sbdroprecord() into sbdroprecord_locked() that asserts
  the socket buffer lock.  sbdroprecord() unconditionally locks
  the socket buffer before calling sbdroprecord_locked().

- sofree() now calls socantsendmore_locked() and re-acquires the
  socket buffer lock on return.  It also now calls
  sbrelease_locked().

- sorflush() now calls socantrcvmore_locked() and re-acquires the
  socket buffer lock on return.  Clean up/mess up other behavior
  in sorflush() relating to the temporary stack copy of the socket
  buffer used with dom_dispose by more properly initializing the
  temporary copy, and selectively bzeroing/copying more carefully
  to prevent WITNESS from getting confused by improperly
  initialized mutexes.  Annotate why that's necessary, or at
  least, needed.

- soisconnected() now calls sbdrop_locked() before unlocking the
  socket buffer to avoid locking overhead.

Some parts of this change were:

Submitted by:	sam
Sponsored by:	FreeBSD Foundation
Obtained from:	BSD/OS
This commit is contained in:
Robert Watson 2004-06-21 00:20:43 +00:00
parent 15b87b531e
commit a34b704666
6 changed files with 560 additions and 71 deletions

View File

@ -197,9 +197,9 @@ soisdisconnected(so)
SOCKBUF_UNLOCK(&so->so_rcv);
SOCKBUF_LOCK(&so->so_snd);
so->so_snd.sb_state |= SBS_CANTSENDMORE;
sbdrop_locked(&so->so_snd, so->so_snd.sb_cc);
SOCKBUF_UNLOCK(&so->so_snd);
wakeup(&so->so_timeo);
sbdrop(&so->so_snd, so->so_snd.sb_cc);
sowwakeup(so);
sorwakeup(so);
}
@ -296,14 +296,38 @@ sonewconn(head, connstatus)
* protocol when it detects that the peer will send no more data.
* Data queued for reading in the socket may yet be read.
*/
void
socantsendmore_locked(so)
struct socket *so;
{
SOCKBUF_LOCK_ASSERT(&so->so_snd);
so->so_snd.sb_state |= SBS_CANTSENDMORE;
sowwakeup_locked(so);
mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
}
void
socantsendmore(so)
struct socket *so;
{
so->so_snd.sb_state |= SBS_CANTSENDMORE;
sowwakeup(so);
SOCKBUF_LOCK(&so->so_snd);
socantsendmore_locked(so);
mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
}
void
socantrcvmore_locked(so)
struct socket *so;
{
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
so->so_rcv.sb_state |= SBS_CANTRCVMORE;
sorwakeup_locked(so);
mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
}
void
@ -311,8 +335,9 @@ socantrcvmore(so)
struct socket *so;
{
so->so_rcv.sb_state |= SBS_CANTRCVMORE;
sorwakeup(so);
SOCKBUF_LOCK(&so->so_rcv);
socantrcvmore_locked(so);
mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
}
/*
@ -356,9 +381,16 @@ sb_lock(sb)
}
/*
* Wakeup processes waiting on a socket buffer.
* Do asynchronous notification via SIGIO
* if the socket has the SS_ASYNC flag set.
* Wakeup processes waiting on a socket buffer. Do asynchronous
* notification via SIGIO if the socket has the SS_ASYNC flag set.
*
* Called with the socket buffer lock held; will release the lock by the end
* of the function. This allows the caller to acquire the socket buffer lock
* while testing for the need for various sorts of wakeup and hold it through
* to the point where it's no longer required. We currently hold the lock
* through calls out to other subsystems (with the exception of kqueue), and
* then release it to avoid lock order issues. It's not clear that's
* correct.
*/
void
sowakeup(so, sb)
@ -366,19 +398,23 @@ sowakeup(so, sb)
register struct sockbuf *sb;
{
SOCKBUF_LOCK_ASSERT(sb);
selwakeuppri(&sb->sb_sel, PSOCK);
sb->sb_flags &= ~SB_SEL;
if (sb->sb_flags & SB_WAIT) {
sb->sb_flags &= ~SB_WAIT;
wakeup(&sb->sb_cc);
}
KNOTE(&sb->sb_sel.si_note, 0);
SOCKBUF_UNLOCK(sb);
if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
pgsigio(&so->so_sigio, SIGIO, 0);
if (sb->sb_flags & SB_UPCALL)
(*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
if (sb->sb_flags & SB_AIO)
aio_swake(so, sb);
KNOTE(&sb->sb_sel.si_note, 0);
mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED);
}
/*
@ -500,17 +536,29 @@ sbreserve(sb, cc, so, td)
* Free mbufs held by a socket, and reserved mbuf space.
*/
void
sbrelease(sb, so)
sbrelease_locked(sb, so)
struct sockbuf *sb;
struct socket *so;
{
sbflush(sb);
SOCKBUF_LOCK_ASSERT(sb);
sbflush_locked(sb);
(void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
RLIM_INFINITY);
sb->sb_mbmax = 0;
}
void
sbrelease(sb, so)
struct sockbuf *sb;
struct socket *so;
{
SOCKBUF_LOCK(sb);
sbrelease_locked(sb, so);
SOCKBUF_UNLOCK(sb);
}
/*
* Routines to add and remove
* data from an mbuf queue.
@ -542,6 +590,8 @@ sblastrecordchk(struct sockbuf *sb, const char *file, int line)
{
struct mbuf *m = sb->sb_mb;
SOCKBUF_LOCK_ASSERT(sb);
while (m && m->m_nextpkt)
m = m->m_nextpkt;
@ -561,6 +611,8 @@ sblastmbufchk(struct sockbuf *sb, const char *file, int line)
struct mbuf *m = sb->sb_mb;
struct mbuf *n;
SOCKBUF_LOCK_ASSERT(sb);
while (m && m->m_nextpkt)
m = m->m_nextpkt;
@ -583,6 +635,7 @@ sblastmbufchk(struct sockbuf *sb, const char *file, int line)
#endif /* SOCKBUF_DEBUG */
#define SBLINKRECORD(sb, m0) do { \
SOCKBUF_LOCK_ASSERT(sb); \
if ((sb)->sb_lastrecord != NULL) \
(sb)->sb_lastrecord->m_nextpkt = (m0); \
else \
@ -597,14 +650,17 @@ sblastmbufchk(struct sockbuf *sb, const char *file, int line)
* discarded and mbufs are compacted where possible.
*/
void
sbappend(sb, m)
sbappend_locked(sb, m)
struct sockbuf *sb;
struct mbuf *m;
{
register struct mbuf *n;
SOCKBUF_LOCK_ASSERT(sb);
if (m == 0)
return;
SBLASTRECORDCHK(sb);
n = sb->sb_mb;
if (n) {
@ -612,7 +668,7 @@ sbappend(sb, m)
n = n->m_nextpkt;
do {
if (n->m_flags & M_EOR) {
sbappendrecord(sb, m); /* XXXXXX!!!! */
sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
return;
}
} while (n->m_next && (n = n->m_next));
@ -625,7 +681,7 @@ sbappend(sb, m)
if ((n = sb->sb_lastrecord) != NULL) {
do {
if (n->m_flags & M_EOR) {
sbappendrecord(sb, m); /* XXXXXX!!!! */
sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
return;
}
} while (n->m_next && (n = n->m_next));
@ -641,14 +697,32 @@ sbappend(sb, m)
SBLASTRECORDCHK(sb);
}
/*
* Append mbuf chain m to the last record in the
* socket buffer sb. The additional space associated
* the mbuf chain is recorded in sb. Empty mbufs are
* discarded and mbufs are compacted where possible.
*/
void
sbappend(sb, m)
struct sockbuf *sb;
struct mbuf *m;
{
SOCKBUF_LOCK(sb);
sbappend_locked(sb, m);
SOCKBUF_UNLOCK(sb);
}
/*
* This version of sbappend() should only be used when the caller
* absolutely knows that there will never be more than one record
* in the socket buffer, that is, a stream protocol (such as TCP).
*/
void
sbappendstream(struct sockbuf *sb, struct mbuf *m)
sbappendstream_locked(struct sockbuf *sb, struct mbuf *m)
{
SOCKBUF_LOCK_ASSERT(sb);
KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
@ -661,6 +735,20 @@ sbappendstream(struct sockbuf *sb, struct mbuf *m)
SBLASTRECORDCHK(sb);
}
/*
* This version of sbappend() should only be used when the caller
* absolutely knows that there will never be more than one record
* in the socket buffer, that is, a stream protocol (such as TCP).
*/
void
sbappendstream(struct sockbuf *sb, struct mbuf *m)
{
SOCKBUF_LOCK(sb);
sbappendstream_locked(sb, m);
SOCKBUF_UNLOCK(sb);
}
#ifdef SOCKBUF_DEBUG
void
sbcheck(sb)
@ -670,6 +758,8 @@ sbcheck(sb)
struct mbuf *n = 0;
u_long len = 0, mbcnt = 0;
SOCKBUF_LOCK_ASSERT(sb);
for (m = sb->sb_mb; m; m = n) {
n = m->m_nextpkt;
for (; m; m = m->m_next) {
@ -692,12 +782,14 @@ sbcheck(sb)
* begins a new record.
*/
void
sbappendrecord(sb, m0)
sbappendrecord_locked(sb, m0)
register struct sockbuf *sb;
register struct mbuf *m0;
{
register struct mbuf *m;
SOCKBUF_LOCK_ASSERT(sb);
if (m0 == 0)
return;
m = sb->sb_mb;
@ -724,19 +816,36 @@ sbappendrecord(sb, m0)
sbcompress(sb, m, m0);
}
/*
* As above, except the mbuf chain
* begins a new record.
*/
void
sbappendrecord(sb, m0)
register struct sockbuf *sb;
register struct mbuf *m0;
{
SOCKBUF_LOCK(sb);
sbappendrecord_locked(sb, m0);
SOCKBUF_UNLOCK(sb);
}
/*
* As above except that OOB data
* is inserted at the beginning of the sockbuf,
* but after any other OOB data.
*/
void
sbinsertoob(sb, m0)
sbinsertoob_locked(sb, m0)
register struct sockbuf *sb;
register struct mbuf *m0;
{
register struct mbuf *m;
register struct mbuf **mp;
SOCKBUF_LOCK_ASSERT(sb);
if (m0 == 0)
return;
for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
@ -770,6 +879,22 @@ sbinsertoob(sb, m0)
sbcompress(sb, m, m0);
}
/*
* As above except that OOB data
* is inserted at the beginning of the sockbuf,
* but after any other OOB data.
*/
void
sbinsertoob(sb, m0)
register struct sockbuf *sb;
register struct mbuf *m0;
{
SOCKBUF_LOCK(sb);
sbinsertoob_locked(sb, m0);
SOCKBUF_UNLOCK(sb);
}
/*
* Append address and data, and optionally, control (ancillary) data
* to the receive queue of a socket. If present,
@ -777,7 +902,7 @@ sbinsertoob(sb, m0)
* Returns 0 if no space in sockbuf or insufficient mbufs.
*/
int
sbappendaddr(sb, asa, m0, control)
sbappendaddr_locked(sb, asa, m0, control)
struct sockbuf *sb;
const struct sockaddr *asa;
struct mbuf *m0, *control;
@ -785,11 +910,14 @@ sbappendaddr(sb, asa, m0, control)
struct mbuf *m, *n, *nlast;
int space = asa->sa_len;
SOCKBUF_LOCK_ASSERT(sb);
if (m0 && (m0->m_flags & M_PKTHDR) == 0)
panic("sbappendaddr");
panic("sbappendaddr_locked");
if (m0)
space += m0->m_pkthdr.len;
space += m_length(control, &n);
if (space > sbspace(sb))
return (0);
#if MSIZE <= 256
@ -819,17 +947,40 @@ sbappendaddr(sb, asa, m0, control)
return (1);
}
/*
* Append address and data, and optionally, control (ancillary) data
* to the receive queue of a socket. If present,
* m0 must include a packet header with total length.
* Returns 0 if no space in sockbuf or insufficient mbufs.
*/
int
sbappendcontrol(sb, m0, control)
sbappendaddr(sb, asa, m0, control)
struct sockbuf *sb;
const struct sockaddr *asa;
struct mbuf *m0, *control;
{
int retval;
SOCKBUF_LOCK(sb);
retval = sbappendaddr_locked(sb, asa, m0, control);
SOCKBUF_UNLOCK(sb);
return (retval);
}
int
sbappendcontrol_locked(sb, m0, control)
struct sockbuf *sb;
struct mbuf *control, *m0;
{
struct mbuf *m, *n, *mlast;
int space;
SOCKBUF_LOCK_ASSERT(sb);
if (control == 0)
panic("sbappendcontrol");
panic("sbappendcontrol_locked");
space = m_length(control, &n) + m_length(m0, NULL);
if (space > sbspace(sb))
return (0);
n->m_next = m0; /* concatenate data to control */
@ -849,6 +1000,19 @@ sbappendcontrol(sb, m0, control)
return (1);
}
int
sbappendcontrol(sb, m0, control)
struct sockbuf *sb;
struct mbuf *control, *m0;
{
int retval;
SOCKBUF_LOCK(sb);
retval = sbappendcontrol_locked(sb, m0, control);
SOCKBUF_UNLOCK(sb);
return (retval);
}
/*
* Compress mbuf chain m into the socket
* buffer sb following mbuf n. If n
@ -862,6 +1026,8 @@ sbcompress(sb, m, n)
register int eor = 0;
register struct mbuf *o;
SOCKBUF_LOCK_ASSERT(sb);
while (m) {
eor |= m->m_flags & M_EOR;
if (m->m_len == 0 &&
@ -914,12 +1080,14 @@ sbcompress(sb, m, n)
* Check that all resources are reclaimed.
*/
void
sbflush(sb)
sbflush_locked(sb)
register struct sockbuf *sb;
{
SOCKBUF_LOCK_ASSERT(sb);
if (sb->sb_flags & SB_LOCK)
panic("sbflush: locked");
panic("sbflush_locked: locked");
while (sb->sb_mbcnt) {
/*
* Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
@ -927,23 +1095,35 @@ sbflush(sb)
*/
if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
break;
sbdrop(sb, (int)sb->sb_cc);
sbdrop_locked(sb, (int)sb->sb_cc);
}
if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt)
panic("sbflush: cc %u || mb %p || mbcnt %u", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
panic("sbflush_locked: cc %u || mb %p || mbcnt %u", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
}
void
sbflush(sb)
register struct sockbuf *sb;
{
SOCKBUF_LOCK(sb);
sbflush_locked(sb);
SOCKBUF_UNLOCK(sb);
}
/*
* Drop data from (the front of) a sockbuf.
*/
void
sbdrop(sb, len)
sbdrop_locked(sb, len)
register struct sockbuf *sb;
register int len;
{
register struct mbuf *m;
struct mbuf *next;
SOCKBUF_LOCK_ASSERT(sb);
next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
while (len > 0) {
if (m == 0) {
@ -989,16 +1169,32 @@ sbdrop(sb, len)
}
}
/*
* Drop data from (the front of) a sockbuf.
*/
void
sbdrop(sb, len)
register struct sockbuf *sb;
register int len;
{
SOCKBUF_LOCK(sb);
sbdrop_locked(sb, len);
SOCKBUF_UNLOCK(sb);
}
/*
* Drop a record off the front of a sockbuf
* and move the next record to the front.
*/
void
sbdroprecord(sb)
sbdroprecord_locked(sb)
register struct sockbuf *sb;
{
register struct mbuf *m;
SOCKBUF_LOCK_ASSERT(sb);
m = sb->sb_mb;
if (m) {
sb->sb_mb = m->m_nextpkt;
@ -1010,6 +1206,20 @@ sbdroprecord(sb)
SB_EMPTY_FIXUP(sb);
}
/*
* Drop a record off the front of a sockbuf
* and move the next record to the front.
*/
void
sbdroprecord(sb)
register struct sockbuf *sb;
{
SOCKBUF_LOCK(sb);
sbdroprecord_locked(sb);
SOCKBUF_UNLOCK(sb);
}
/*
* Create a "control" mbuf containing the specified data
* with the specified type for presentation on a socket buffer.

View File

@ -348,9 +348,15 @@ sofree(so)
SOCKBUF_LOCK(&so->so_snd);
so->so_snd.sb_flags |= SB_NOINTR;
(void)sblock(&so->so_snd, M_WAITOK);
socantsendmore(so);
/*
* socantsendmore_locked() drops the socket buffer mutex so that it
* can safely perform wakeups. Re-acquire the mutex before
* continuing.
*/
socantsendmore_locked(so);
SOCKBUF_LOCK(&so->so_snd);
sbunlock(&so->so_snd);
sbrelease(&so->so_snd, so);
sbrelease_locked(&so->so_snd, so);
SOCKBUF_UNLOCK(&so->so_snd);
sorflush(so);
sodealloc(so);
@ -1202,7 +1208,7 @@ soreceive(so, psa, uio, mp0, controlp, flagsp)
flags |= MSG_TRUNC;
if ((flags & MSG_PEEK) == 0) {
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
(void) sbdroprecord(&so->so_rcv);
(void) sbdroprecord_locked(&so->so_rcv);
}
}
if ((flags & MSG_PEEK) == 0) {
@ -1271,23 +1277,41 @@ sorflush(so)
struct protosw *pr = so->so_proto;
struct sockbuf asb;
/*
* XXXRW: This is quite ugly. The existing code made a copy of the
* socket buffer, then zero'd the original to clear the buffer
* fields. However, with mutexes in the socket buffer, this causes
* problems. We only clear the zeroable bits of the original;
* however, we have to initialize and destroy the mutex in the copy
* so that dom_dispose() and sbrelease() can lock t as needed.
*/
SOCKBUF_LOCK(sb);
sb->sb_flags |= SB_NOINTR;
(void) sblock(sb, M_WAITOK);
socantrcvmore(so);
sbunlock(sb);
asb = *sb;
/*
* Invalidate/clear most of the sockbuf structure, but keep
* its selinfo structure valid.
* socantrcvmore_locked() drops the socket buffer mutex so that it
* can safely perform wakeups. Re-acquire the mutex before
* continuing.
*/
socantrcvmore_locked(so);
SOCKBUF_LOCK(sb);
sbunlock(sb);
/*
* Invalidate/clear most of the sockbuf structure, but leave
* selinfo and mutex data unchanged.
*/
bzero(&asb, offsetof(struct sockbuf, sb_startzero));
bcopy(&sb->sb_startzero, &asb.sb_startzero,
sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
bzero(&sb->sb_startzero,
sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
SOCKBUF_UNLOCK(sb);
SOCKBUF_LOCK_INIT(&asb, "so_rcv");
if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
(*pr->pr_domain->dom_dispose)(asb.sb_mb);
sbrelease(&asb, so);
SOCKBUF_LOCK_DESTROY(&asb);
}
#ifdef INET

View File

@ -197,9 +197,9 @@ soisdisconnected(so)
SOCKBUF_UNLOCK(&so->so_rcv);
SOCKBUF_LOCK(&so->so_snd);
so->so_snd.sb_state |= SBS_CANTSENDMORE;
sbdrop_locked(&so->so_snd, so->so_snd.sb_cc);
SOCKBUF_UNLOCK(&so->so_snd);
wakeup(&so->so_timeo);
sbdrop(&so->so_snd, so->so_snd.sb_cc);
sowwakeup(so);
sorwakeup(so);
}
@ -296,14 +296,38 @@ sonewconn(head, connstatus)
* protocol when it detects that the peer will send no more data.
* Data queued for reading in the socket may yet be read.
*/
void
socantsendmore_locked(so)
struct socket *so;
{
SOCKBUF_LOCK_ASSERT(&so->so_snd);
so->so_snd.sb_state |= SBS_CANTSENDMORE;
sowwakeup_locked(so);
mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
}
void
socantsendmore(so)
struct socket *so;
{
so->so_snd.sb_state |= SBS_CANTSENDMORE;
sowwakeup(so);
SOCKBUF_LOCK(&so->so_snd);
socantsendmore_locked(so);
mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
}
void
socantrcvmore_locked(so)
struct socket *so;
{
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
so->so_rcv.sb_state |= SBS_CANTRCVMORE;
sorwakeup_locked(so);
mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
}
void
@ -311,8 +335,9 @@ socantrcvmore(so)
struct socket *so;
{
so->so_rcv.sb_state |= SBS_CANTRCVMORE;
sorwakeup(so);
SOCKBUF_LOCK(&so->so_rcv);
socantrcvmore_locked(so);
mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
}
/*
@ -356,9 +381,16 @@ sb_lock(sb)
}
/*
* Wakeup processes waiting on a socket buffer.
* Do asynchronous notification via SIGIO
* if the socket has the SS_ASYNC flag set.
* Wakeup processes waiting on a socket buffer. Do asynchronous
* notification via SIGIO if the socket has the SS_ASYNC flag set.
*
* Called with the socket buffer lock held; will release the lock by the end
* of the function. This allows the caller to acquire the socket buffer lock
* while testing for the need for various sorts of wakeup and hold it through
* to the point where it's no longer required. We currently hold the lock
* through calls out to other subsystems (with the exception of kqueue), and
* then release it to avoid lock order issues. It's not clear that's
* correct.
*/
void
sowakeup(so, sb)
@ -366,19 +398,23 @@ sowakeup(so, sb)
register struct sockbuf *sb;
{
SOCKBUF_LOCK_ASSERT(sb);
selwakeuppri(&sb->sb_sel, PSOCK);
sb->sb_flags &= ~SB_SEL;
if (sb->sb_flags & SB_WAIT) {
sb->sb_flags &= ~SB_WAIT;
wakeup(&sb->sb_cc);
}
KNOTE(&sb->sb_sel.si_note, 0);
SOCKBUF_UNLOCK(sb);
if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
pgsigio(&so->so_sigio, SIGIO, 0);
if (sb->sb_flags & SB_UPCALL)
(*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
if (sb->sb_flags & SB_AIO)
aio_swake(so, sb);
KNOTE(&sb->sb_sel.si_note, 0);
mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED);
}
/*
@ -500,17 +536,29 @@ sbreserve(sb, cc, so, td)
* Free mbufs held by a socket, and reserved mbuf space.
*/
void
sbrelease(sb, so)
sbrelease_locked(sb, so)
struct sockbuf *sb;
struct socket *so;
{
sbflush(sb);
SOCKBUF_LOCK_ASSERT(sb);
sbflush_locked(sb);
(void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
RLIM_INFINITY);
sb->sb_mbmax = 0;
}
void
sbrelease(sb, so)
struct sockbuf *sb;
struct socket *so;
{
SOCKBUF_LOCK(sb);
sbrelease_locked(sb, so);
SOCKBUF_UNLOCK(sb);
}
/*
* Routines to add and remove
* data from an mbuf queue.
@ -542,6 +590,8 @@ sblastrecordchk(struct sockbuf *sb, const char *file, int line)
{
struct mbuf *m = sb->sb_mb;
SOCKBUF_LOCK_ASSERT(sb);
while (m && m->m_nextpkt)
m = m->m_nextpkt;
@ -561,6 +611,8 @@ sblastmbufchk(struct sockbuf *sb, const char *file, int line)
struct mbuf *m = sb->sb_mb;
struct mbuf *n;
SOCKBUF_LOCK_ASSERT(sb);
while (m && m->m_nextpkt)
m = m->m_nextpkt;
@ -583,6 +635,7 @@ sblastmbufchk(struct sockbuf *sb, const char *file, int line)
#endif /* SOCKBUF_DEBUG */
#define SBLINKRECORD(sb, m0) do { \
SOCKBUF_LOCK_ASSERT(sb); \
if ((sb)->sb_lastrecord != NULL) \
(sb)->sb_lastrecord->m_nextpkt = (m0); \
else \
@ -597,14 +650,17 @@ sblastmbufchk(struct sockbuf *sb, const char *file, int line)
* discarded and mbufs are compacted where possible.
*/
void
sbappend(sb, m)
sbappend_locked(sb, m)
struct sockbuf *sb;
struct mbuf *m;
{
register struct mbuf *n;
SOCKBUF_LOCK_ASSERT(sb);
if (m == 0)
return;
SBLASTRECORDCHK(sb);
n = sb->sb_mb;
if (n) {
@ -612,7 +668,7 @@ sbappend(sb, m)
n = n->m_nextpkt;
do {
if (n->m_flags & M_EOR) {
sbappendrecord(sb, m); /* XXXXXX!!!! */
sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
return;
}
} while (n->m_next && (n = n->m_next));
@ -625,7 +681,7 @@ sbappend(sb, m)
if ((n = sb->sb_lastrecord) != NULL) {
do {
if (n->m_flags & M_EOR) {
sbappendrecord(sb, m); /* XXXXXX!!!! */
sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
return;
}
} while (n->m_next && (n = n->m_next));
@ -641,14 +697,32 @@ sbappend(sb, m)
SBLASTRECORDCHK(sb);
}
/*
* Append mbuf chain m to the last record in the
* socket buffer sb. The additional space associated
* the mbuf chain is recorded in sb. Empty mbufs are
* discarded and mbufs are compacted where possible.
*/
void
sbappend(sb, m)
struct sockbuf *sb;
struct mbuf *m;
{
SOCKBUF_LOCK(sb);
sbappend_locked(sb, m);
SOCKBUF_UNLOCK(sb);
}
/*
* This version of sbappend() should only be used when the caller
* absolutely knows that there will never be more than one record
* in the socket buffer, that is, a stream protocol (such as TCP).
*/
void
sbappendstream(struct sockbuf *sb, struct mbuf *m)
sbappendstream_locked(struct sockbuf *sb, struct mbuf *m)
{
SOCKBUF_LOCK_ASSERT(sb);
KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
@ -661,6 +735,20 @@ sbappendstream(struct sockbuf *sb, struct mbuf *m)
SBLASTRECORDCHK(sb);
}
/*
* This version of sbappend() should only be used when the caller
* absolutely knows that there will never be more than one record
* in the socket buffer, that is, a stream protocol (such as TCP).
*/
void
sbappendstream(struct sockbuf *sb, struct mbuf *m)
{
SOCKBUF_LOCK(sb);
sbappendstream_locked(sb, m);
SOCKBUF_UNLOCK(sb);
}
#ifdef SOCKBUF_DEBUG
void
sbcheck(sb)
@ -670,6 +758,8 @@ sbcheck(sb)
struct mbuf *n = 0;
u_long len = 0, mbcnt = 0;
SOCKBUF_LOCK_ASSERT(sb);
for (m = sb->sb_mb; m; m = n) {
n = m->m_nextpkt;
for (; m; m = m->m_next) {
@ -692,12 +782,14 @@ sbcheck(sb)
* begins a new record.
*/
void
sbappendrecord(sb, m0)
sbappendrecord_locked(sb, m0)
register struct sockbuf *sb;
register struct mbuf *m0;
{
register struct mbuf *m;
SOCKBUF_LOCK_ASSERT(sb);
if (m0 == 0)
return;
m = sb->sb_mb;
@ -724,19 +816,36 @@ sbappendrecord(sb, m0)
sbcompress(sb, m, m0);
}
/*
* As above, except the mbuf chain
* begins a new record.
*/
void
sbappendrecord(sb, m0)
register struct sockbuf *sb;
register struct mbuf *m0;
{
SOCKBUF_LOCK(sb);
sbappendrecord_locked(sb, m0);
SOCKBUF_UNLOCK(sb);
}
/*
* As above except that OOB data
* is inserted at the beginning of the sockbuf,
* but after any other OOB data.
*/
void
sbinsertoob(sb, m0)
sbinsertoob_locked(sb, m0)
register struct sockbuf *sb;
register struct mbuf *m0;
{
register struct mbuf *m;
register struct mbuf **mp;
SOCKBUF_LOCK_ASSERT(sb);
if (m0 == 0)
return;
for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
@ -770,6 +879,22 @@ sbinsertoob(sb, m0)
sbcompress(sb, m, m0);
}
/*
* As above except that OOB data
* is inserted at the beginning of the sockbuf,
* but after any other OOB data.
*/
void
sbinsertoob(sb, m0)
register struct sockbuf *sb;
register struct mbuf *m0;
{
SOCKBUF_LOCK(sb);
sbinsertoob_locked(sb, m0);
SOCKBUF_UNLOCK(sb);
}
/*
* Append address and data, and optionally, control (ancillary) data
* to the receive queue of a socket. If present,
@ -777,7 +902,7 @@ sbinsertoob(sb, m0)
* Returns 0 if no space in sockbuf or insufficient mbufs.
*/
int
sbappendaddr(sb, asa, m0, control)
sbappendaddr_locked(sb, asa, m0, control)
struct sockbuf *sb;
const struct sockaddr *asa;
struct mbuf *m0, *control;
@ -785,11 +910,14 @@ sbappendaddr(sb, asa, m0, control)
struct mbuf *m, *n, *nlast;
int space = asa->sa_len;
SOCKBUF_LOCK_ASSERT(sb);
if (m0 && (m0->m_flags & M_PKTHDR) == 0)
panic("sbappendaddr");
panic("sbappendaddr_locked");
if (m0)
space += m0->m_pkthdr.len;
space += m_length(control, &n);
if (space > sbspace(sb))
return (0);
#if MSIZE <= 256
@ -819,17 +947,40 @@ sbappendaddr(sb, asa, m0, control)
return (1);
}
/*
* Append address and data, and optionally, control (ancillary) data
* to the receive queue of a socket. If present,
* m0 must include a packet header with total length.
* Returns 0 if no space in sockbuf or insufficient mbufs.
*/
int
sbappendcontrol(sb, m0, control)
sbappendaddr(sb, asa, m0, control)
struct sockbuf *sb;
const struct sockaddr *asa;
struct mbuf *m0, *control;
{
int retval;
SOCKBUF_LOCK(sb);
retval = sbappendaddr_locked(sb, asa, m0, control);
SOCKBUF_UNLOCK(sb);
return (retval);
}
int
sbappendcontrol_locked(sb, m0, control)
struct sockbuf *sb;
struct mbuf *control, *m0;
{
struct mbuf *m, *n, *mlast;
int space;
SOCKBUF_LOCK_ASSERT(sb);
if (control == 0)
panic("sbappendcontrol");
panic("sbappendcontrol_locked");
space = m_length(control, &n) + m_length(m0, NULL);
if (space > sbspace(sb))
return (0);
n->m_next = m0; /* concatenate data to control */
@ -849,6 +1000,19 @@ sbappendcontrol(sb, m0, control)
return (1);
}
int
sbappendcontrol(sb, m0, control)
struct sockbuf *sb;
struct mbuf *control, *m0;
{
int retval;
SOCKBUF_LOCK(sb);
retval = sbappendcontrol_locked(sb, m0, control);
SOCKBUF_UNLOCK(sb);
return (retval);
}
/*
* Compress mbuf chain m into the socket
* buffer sb following mbuf n. If n
@ -862,6 +1026,8 @@ sbcompress(sb, m, n)
register int eor = 0;
register struct mbuf *o;
SOCKBUF_LOCK_ASSERT(sb);
while (m) {
eor |= m->m_flags & M_EOR;
if (m->m_len == 0 &&
@ -914,12 +1080,14 @@ sbcompress(sb, m, n)
* Check that all resources are reclaimed.
*/
void
sbflush(sb)
sbflush_locked(sb)
register struct sockbuf *sb;
{
SOCKBUF_LOCK_ASSERT(sb);
if (sb->sb_flags & SB_LOCK)
panic("sbflush: locked");
panic("sbflush_locked: locked");
while (sb->sb_mbcnt) {
/*
* Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
@ -927,23 +1095,35 @@ sbflush(sb)
*/
if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
break;
sbdrop(sb, (int)sb->sb_cc);
sbdrop_locked(sb, (int)sb->sb_cc);
}
if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt)
panic("sbflush: cc %u || mb %p || mbcnt %u", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
panic("sbflush_locked: cc %u || mb %p || mbcnt %u", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
}
void
sbflush(sb)
register struct sockbuf *sb;
{
SOCKBUF_LOCK(sb);
sbflush_locked(sb);
SOCKBUF_UNLOCK(sb);
}
/*
* Drop data from (the front of) a sockbuf.
*/
void
sbdrop(sb, len)
sbdrop_locked(sb, len)
register struct sockbuf *sb;
register int len;
{
register struct mbuf *m;
struct mbuf *next;
SOCKBUF_LOCK_ASSERT(sb);
next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
while (len > 0) {
if (m == 0) {
@ -989,16 +1169,32 @@ sbdrop(sb, len)
}
}
/*
* Drop data from (the front of) a sockbuf.
*/
void
sbdrop(sb, len)
register struct sockbuf *sb;
register int len;
{
SOCKBUF_LOCK(sb);
sbdrop_locked(sb, len);
SOCKBUF_UNLOCK(sb);
}
/*
* Drop a record off the front of a sockbuf
* and move the next record to the front.
*/
void
sbdroprecord(sb)
sbdroprecord_locked(sb)
register struct sockbuf *sb;
{
register struct mbuf *m;
SOCKBUF_LOCK_ASSERT(sb);
m = sb->sb_mb;
if (m) {
sb->sb_mb = m->m_nextpkt;
@ -1010,6 +1206,20 @@ sbdroprecord(sb)
SB_EMPTY_FIXUP(sb);
}
/*
* Drop a record off the front of a sockbuf
* and move the next record to the front.
*/
void
sbdroprecord(sb)
register struct sockbuf *sb;
{
SOCKBUF_LOCK(sb);
sbdroprecord_locked(sb);
SOCKBUF_UNLOCK(sb);
}
/*
* Create a "control" mbuf containing the specified data
* with the specified type for presentation on a socket buffer.

View File

@ -353,11 +353,14 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
from = (struct sockaddr *)unp->unp_addr;
else
from = &sun_noname;
if (sbappendaddr(&so2->so_rcv, from, m, control)) {
SOCKBUF_LOCK(&so2->so_rcv);
if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) {
SOCKBUF_UNLOCK(&so2->so_rcv);
sorwakeup(so2);
m = NULL;
control = NULL;
} else {
SOCKBUF_UNLOCK(&so2->so_rcv);
error = ENOBUFS;
}
if (nam != NULL)
@ -389,16 +392,17 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
if (unp->unp_conn == NULL)
panic("uipc_send connected but no connection?");
so2 = unp->unp_conn->unp_socket;
SOCKBUF_LOCK(&so2->so_rcv);
/*
* Send to paired receive port, and then reduce
* send buffer hiwater marks to maintain backpressure.
* Wake up readers.
*/
if (control != NULL) {
if (sbappendcontrol(&so2->so_rcv, m, control))
if (sbappendcontrol_locked(&so2->so_rcv, m, control))
control = NULL;
} else {
sbappend(&so2->so_rcv, m);
sbappend_locked(&so2->so_rcv, m);
}
so->so_snd.sb_mbmax -=
so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt;
@ -408,6 +412,7 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
(void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat,
newhiwat, RLIM_INFINITY);
unp->unp_conn->unp_cc = so2->so_rcv.sb_cc;
SOCKBUF_UNLOCK(&so2->so_rcv);
sorwakeup(so2);
m = NULL;
break;

View File

@ -41,8 +41,10 @@ __FBSDID("$FreeBSD$");
#include <sys/domain.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/mutex.h>
#include <sys/protosw.h>
#include <sys/signalvar.h>
#include <sys/socket.h>

View File

@ -319,13 +319,14 @@ struct xsocket {
((sb)->sb_flags |= SB_LOCK), 0)
/* release lock on sockbuf sb */
#define sbunlock(sb) { \
#define sbunlock(sb) do { \
SOCKBUF_LOCK_ASSERT(sb); \
(sb)->sb_flags &= ~SB_LOCK; \
if ((sb)->sb_flags & SB_WANT) { \
(sb)->sb_flags &= ~SB_WANT; \
wakeup(&(sb)->sb_flags); \
} \
}
} while (0)
/*
* soref()/sorele() ref-count the socket structure. Note that you must
@ -355,14 +356,37 @@ struct xsocket {
SOCK_UNLOCK(so); \
} while(0)
#define sorwakeup(so) do { \
/*
* In sorwakeup() and sowwakeup(), acquire the socket buffer lock to
* avoid a non-atomic test-and-wakeup. However, sowakeup is
* responsible for releasing the lock if it is called. We unlock only
* if we don't call into sowakeup. If any code is introduced that
* directly invokes the underlying sowakeup() primitives, it must
* maintain the same semantics.
*/
#define sorwakeup_locked(so) do { \
SOCKBUF_LOCK_ASSERT(&(so)->so_rcv); \
if (sb_notify(&(so)->so_rcv)) \
sowakeup((so), &(so)->so_rcv); \
sowakeup((so), &(so)->so_rcv); \
else \
SOCKBUF_UNLOCK(&(so)->so_rcv); \
} while (0)
#define sorwakeup(so) do { \
SOCKBUF_LOCK(&(so)->so_rcv); \
sorwakeup_locked(so); \
} while (0)
#define sowwakeup_locked(so) do { \
if (sb_notify(&(so)->so_snd)) \
sowakeup((so), &(so)->so_snd); \
else \
SOCKBUF_UNLOCK(&(so)->so_snd); \
} while (0)
#define sowwakeup(so) do { \
if (sb_notify(&(so)->so_snd)) \
sowakeup((so), &(so)->so_snd); \
SOCKBUF_LOCK(&(so)->so_snd); \
sowwakeup_locked(so); \
} while (0)
/*
@ -412,21 +436,33 @@ struct uio;
int sockargs(struct mbuf **mp, caddr_t buf, int buflen, int type);
int getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len);
void sbappend(struct sockbuf *sb, struct mbuf *m);
void sbappend_locked(struct sockbuf *sb, struct mbuf *m);
void sbappendstream(struct sockbuf *sb, struct mbuf *m);
void sbappendstream_locked(struct sockbuf *sb, struct mbuf *m);
int sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
struct mbuf *m0, struct mbuf *control);
int sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
struct mbuf *m0, struct mbuf *control);
int sbappendcontrol(struct sockbuf *sb, struct mbuf *m0,
struct mbuf *control);
int sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
struct mbuf *control);
void sbappendrecord(struct sockbuf *sb, struct mbuf *m0);
void sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0);
void sbcheck(struct sockbuf *sb);
void sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n);
struct mbuf *
sbcreatecontrol(caddr_t p, int size, int type, int level);
void sbdrop(struct sockbuf *sb, int len);
void sbdrop_locked(struct sockbuf *sb, int len);
void sbdroprecord(struct sockbuf *sb);
void sbdroprecord_locked(struct sockbuf *sb);
void sbflush(struct sockbuf *sb);
void sbflush_locked(struct sockbuf *sb);
void sbinsertoob(struct sockbuf *sb, struct mbuf *m0);
void sbinsertoob_locked(struct sockbuf *sb, struct mbuf *m0);
void sbrelease(struct sockbuf *sb, struct socket *so);
void sbrelease_locked(struct sockbuf *sb, struct socket *so);
int sbreserve(struct sockbuf *sb, u_long cc, struct socket *so,
struct thread *td);
void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb);
@ -438,7 +474,9 @@ struct socket *soalloc(int mflags);
int socheckuid(struct socket *so, uid_t uid);
int sobind(struct socket *so, struct sockaddr *nam, struct thread *td);
void socantrcvmore(struct socket *so);
void socantrcvmore_locked(struct socket *so);
void socantsendmore(struct socket *so);
void socantsendmore_locked(struct socket *so);
int soclose(struct socket *so);
int soconnect(struct socket *so, struct sockaddr *nam, struct thread *td);
int soconnect2(struct socket *so1, struct socket *so2);