freebsd-skq/sys/sys/unpcb.h
Alan Somers c2090e73d7 Replace 4.4BSD Lite's unix domain socket backpressure hack with a cleaner
mechanism, based on the new SB_STOP sockbuf flag.  The old hack dynamically
changed the sending sockbuf's high water mark whenever adding or removing
data from the receiving sockbuf.  It worked for stream sockets, but it never
worked for SOCK_SEQPACKET sockets because of their atomic nature.  If the
sockbuf was partially full, it might return EMSGSIZE instead of blocking.

The new solution is based on DragonFlyBSD's fix from commit
3a6117bbe0ed6a87605c1e43e12a1438d8844380 on 2008-05-27.  It adds an SB_STOP
flag to sockbufs.  Whenever uipc_send surpasses the socket's size limit, it
sets SB_STOP on the sending sockbuf.  sbspace() will then return 0 for that
sockbuf, causing sosend_generic and friends to block.  uipc_rcvd will
likewise clear SB_STOP.  There are two fringe benefits: uipc_{send,rcvd} no
longer need to call chgsbsize() on every send and receive because they don't
change the sockbuf's high water mark.  Also, uipc_sense no longer needs to
acquire the UIPC linkage lock, because it's simpler to compute the
st_blksizes.

There is one drawback: since sbspace() will only ever return 0 or the
maximum, sosend_generic will allow the sockbuf to exceed its nominal maximum
size by at most one packet of size less than the max.  I don't think that's
a serious problem.  In fact, I'm not even positive that FreeBSD guarantees a
socket will always stay within its nominal size limit.

sys/sys/sockbuf.h
	Add the SB_STOP flag and adjust sbspace()

sys/sys/unpcb.h
	Delete the obsolete unp_cc and unp_mbcnt fields from struct unpcb.

sys/kern/uipc_usrreq.c
	Adjust uipc_rcvd, uipc_send, and uipc_sense to use the SB_STOP
	backpressure mechanism.  Removing obsolete unpcb fields from
	db_show_unpcb.

tests/sys/kern/unix_seqpacket_test.c
	Clear expected failures from ATF.

Obtained from:	DragonFly BSD
PR:		kern/185812
Reviewed by:	silence from freebsd-net@ and rwatson@
MFC after:	3 weeks
Sponsored by:	Spectra Logic Corporation
2014-03-13 18:42:12 +00:00

149 lines
5.6 KiB
C

/*-
* Copyright (c) 1982, 1986, 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)unpcb.h 8.1 (Berkeley) 6/2/93
* $FreeBSD$
*/
#ifndef _SYS_UNPCB_H_
#define _SYS_UNPCB_H_
#include <sys/queue.h>
#include <sys/ucred.h>
/*
* Protocol control block for an active
* instance of a UNIX internal protocol.
*
* A socket may be associated with a vnode in the
* filesystem. If so, the unp_vnode pointer holds
* a reference count to this vnode, which should be irele'd
* when the socket goes away.
*
* A socket may be connected to another socket, in which
* case the control block of the socket to which it is connected
* is given by unp_conn.
*
* A socket may be referenced by a number of sockets (e.g. several
* sockets may be connected to a datagram socket.) These sockets
* are in a linked list starting with unp_refs, linked through
* unp_nextref and null-terminated. Note that a socket may be referenced
* by a number of other sockets and may also reference a socket (not
* necessarily one which is referencing it). This generates
* the need for unp_refs and unp_nextref to be separate fields.
*
* Stream sockets keep copies of receive sockbuf sb_cc and sb_mbcnt
* so that changes in the sockbuf may be computed to modify
* back pressure on the sender accordingly.
*/
typedef u_quad_t unp_gen_t;
LIST_HEAD(unp_head, unpcb);
struct unpcb {
LIST_ENTRY(unpcb) unp_link; /* glue on list of all PCBs */
struct socket *unp_socket; /* pointer back to socket */
struct file *unp_file; /* back-pointer to file for gc. */
struct vnode *unp_vnode; /* if associated with file */
ino_t unp_ino; /* fake inode number */
struct unpcb *unp_conn; /* control block of connected socket */
struct unp_head unp_refs; /* referencing socket linked list */
LIST_ENTRY(unpcb) unp_reflink; /* link in unp_refs list */
struct sockaddr_un *unp_addr; /* bound address of socket */
int reserved1;
int reserved2;
unp_gen_t unp_gencnt; /* generation count of this instance */
short unp_flags; /* flags */
short unp_gcflag; /* Garbage collector flags. */
struct xucred unp_peercred; /* peer credentials, if applicable */
u_int unp_refcount;
u_int unp_msgcount; /* references from message queue */
struct mtx unp_mtx; /* mutex */
};
/*
* Flags in unp_flags.
*
* UNP_HAVEPC - indicates that the unp_peercred member is filled in
* and is really the credentials of the connected peer. This is used
* to determine whether the contents should be sent to the user or
* not.
*
* UNP_HAVEPCCACHED - indicates that the unp_peercred member is filled
* in, but does *not* contain the credentials of the connected peer
* (there may not even be a peer). This is set in unp_listen() when
* it fills in unp_peercred for later consumption by unp_connect().
*/
#define UNP_HAVEPC 0x001
#define UNP_HAVEPCCACHED 0x002
#define UNP_WANTCRED 0x004 /* credentials wanted */
#define UNP_CONNWAIT 0x008 /* connect blocks until accepted */
#define UNPGC_REF 0x1 /* unpcb has external ref. */
#define UNPGC_DEAD 0x2 /* unpcb might be dead. */
#define UNPGC_SCANNED 0x4 /* Has been scanned. */
/*
* These flags are used to handle non-atomicity in connect() and bind()
* operations on a socket: in particular, to avoid races between multiple
* threads or processes operating simultaneously on the same socket.
*/
#define UNP_CONNECTING 0x010 /* Currently connecting. */
#define UNP_BINDING 0x020 /* Currently binding. */
#define sotounpcb(so) ((struct unpcb *)((so)->so_pcb))
/* Hack alert -- this structure depends on <sys/socketvar.h>. */
#ifdef _SYS_SOCKETVAR_H_
struct xunpcb {
size_t xu_len; /* length of this structure */
struct unpcb *xu_unpp; /* to help netstat, fstat */
struct unpcb xu_unp; /* our information */
union {
struct sockaddr_un xuu_addr; /* our bound address */
char xu_dummy1[256];
} xu_au;
#define xu_addr xu_au.xuu_addr
union {
struct sockaddr_un xuu_caddr; /* their bound address */
char xu_dummy2[256];
} xu_cau;
#define xu_caddr xu_cau.xuu_caddr
struct xsocket xu_socket;
u_quad_t xu_alignment_hack;
};
struct xunpgen {
size_t xug_len;
u_int xug_count;
unp_gen_t xug_gen;
so_gen_t xug_sogen;
};
#endif /* _SYS_SOCKETVAR_H_ */
#endif /* _SYS_UNPCB_H_ */