Load balance sockets with new SO_REUSEPORT_LB option
This patch adds a new socket option, SO_REUSEPORT_LB, which allow multiple programs or threads to bind to the same port and incoming connections will be load balanced using a hash function. Most of the code was copied from a similar patch for DragonflyBSD. However, in DragonflyBSD, load balancing is a global on/off setting and can not be set per socket. This patch allows for simultaneous use of both the current SO_REUSEPORT and the new SO_REUSEPORT_LB options on the same system. Required changes to structures Globally change so_options from 16 to 32 bit value to allow for more options. Add hashtable in pcbinfo to hold all SO_REUSEPORT_LB sockets. Limitations As DragonflyBSD, a load balance group is limited to 256 pcbs (256 programs or threads sharing the same socket). Submitted by: Johannes Lundberg <johanlun0@gmail.com> Sponsored by: Limelight Networks Differential Revision: https://reviews.freebsd.org/D11003
This commit is contained in:
parent
2b071b580c
commit
bbf7d4dd03
@ -192,12 +192,12 @@ translator tcpsinfo_t < struct tcpcb *p > {
|
||||
tcps_rport = p == NULL ? 0 : ntohs(p->t_inpcb->inp_inc.inc_ie.ie_fport);
|
||||
tcps_laddr = p == NULL ? 0 :
|
||||
p->t_inpcb->inp_vflag == INP_IPV4 ?
|
||||
inet_ntoa(&p->t_inpcb->inp_inc.inc_ie.ie_dependladdr.ie46_local.ia46_addr4.s_addr) :
|
||||
inet_ntoa6(&p->t_inpcb->inp_inc.inc_ie.ie_dependladdr.ie6_local);
|
||||
inet_ntoa(&p->t_inpcb->inp_inc.inc_ie.ie_dependladdr.id46_addr.ia46_addr4.s_addr) :
|
||||
inet_ntoa6(&p->t_inpcb->inp_inc.inc_ie.ie_dependladdr.id6_addr);
|
||||
tcps_raddr = p == NULL ? 0 :
|
||||
p->t_inpcb->inp_vflag == INP_IPV4 ?
|
||||
inet_ntoa(&p->t_inpcb->inp_inc.inc_ie.ie_dependfaddr.ie46_foreign.ia46_addr4.s_addr) :
|
||||
inet_ntoa6(&p->t_inpcb->inp_inc.inc_ie.ie_dependfaddr.ie6_foreign);
|
||||
inet_ntoa(&p->t_inpcb->inp_inc.inc_ie.ie_dependfaddr.id46_addr.ia46_addr4.s_addr) :
|
||||
inet_ntoa6(&p->t_inpcb->inp_inc.inc_ie.ie_dependfaddr.id6_addr);
|
||||
tcps_state = p == NULL ? -1 : p->t_state;
|
||||
tcps_iss = p == NULL ? 0 : p->iss;
|
||||
tcps_irs = p == NULL ? 0 : p->irs;
|
||||
|
@ -77,7 +77,7 @@ db_print_sotype(short so_type)
|
||||
}
|
||||
|
||||
static void
|
||||
db_print_sooptions(short so_options)
|
||||
db_print_sooptions(int so_options)
|
||||
{
|
||||
int comma;
|
||||
|
||||
@ -122,6 +122,10 @@ db_print_sooptions(short so_options)
|
||||
db_printf("%sSO_REUSEPORT", comma ? ", " : "");
|
||||
comma = 1;
|
||||
}
|
||||
if (so_options & SO_REUSEPORT_LB) {
|
||||
db_printf("%sSO_REUSEPORT_LB", comma ? ", " : "");
|
||||
comma = 1;
|
||||
}
|
||||
if (so_options & SO_TIMESTAMP) {
|
||||
db_printf("%sSO_TIMESTAMP", comma ? ", " : "");
|
||||
comma = 1;
|
||||
|
@ -1056,6 +1056,100 @@ sofree(struct socket *so)
|
||||
sodealloc(so);
|
||||
}
|
||||
|
||||
/*
|
||||
* Let socket in same load balance group (same port and address)
|
||||
* inherit pending sockets of the closing socket.
|
||||
*
|
||||
* "so_inh" will inherit sockets from "so"
|
||||
*/
|
||||
void
|
||||
soinherit(struct socket *so, struct socket *so_inh)
|
||||
{
|
||||
TAILQ_HEAD(, socket) comp, incomp;
|
||||
struct socket *sp, *head, *head_inh;
|
||||
int qlen, incqlen;
|
||||
|
||||
KASSERT(so->so_options & SO_ACCEPTCONN,
|
||||
("so does not accept connection"));
|
||||
KASSERT(so_inh->so_options & SO_ACCEPTCONN,
|
||||
("so_inh does not accept connection"));
|
||||
|
||||
|
||||
restart:
|
||||
SOCK_LOCK(so);
|
||||
if ((head = so->so_listen) != NULL &&
|
||||
__predict_false(SOLISTEN_TRYLOCK(head) == 0)) {
|
||||
SOCK_UNLOCK(so);
|
||||
goto restart;
|
||||
}
|
||||
|
||||
restart_inh:
|
||||
SOCK_LOCK(so_inh);
|
||||
if ((head_inh = so_inh->so_listen) != NULL &&
|
||||
__predict_false(SOLISTEN_TRYLOCK(head_inh) == 0)) {
|
||||
SOCK_UNLOCK(so_inh);
|
||||
goto restart_inh;
|
||||
}
|
||||
|
||||
TAILQ_INIT(&comp);
|
||||
TAILQ_INIT(&incomp);
|
||||
|
||||
/*
|
||||
* Save completed queue and incompleted queue
|
||||
*/
|
||||
TAILQ_CONCAT(&comp, &so->sol_comp, so_list);
|
||||
qlen = so->sol_qlen;
|
||||
so->sol_qlen = 0;
|
||||
|
||||
TAILQ_CONCAT(&incomp, &so->sol_incomp, so_list);
|
||||
incqlen = so->sol_incqlen;
|
||||
so->sol_incqlen = 0;
|
||||
|
||||
/*
|
||||
* Append the saved completed queue and incompleted
|
||||
* queue to the socket inherits them.
|
||||
*
|
||||
* XXX
|
||||
* This may temporarily break the inheriting socket's
|
||||
* so_qlimit.
|
||||
*/
|
||||
TAILQ_FOREACH(sp, &comp, so_list) {
|
||||
refcount_acquire(&so_inh->so_count);
|
||||
sp->so_listen = so_inh;
|
||||
crfree(sp->so_cred);
|
||||
sp->so_cred = crhold(so_inh->so_cred);
|
||||
}
|
||||
|
||||
TAILQ_FOREACH(sp, &incomp, so_list) {
|
||||
refcount_acquire(&so_inh->so_count);
|
||||
sp->so_listen = so_inh;
|
||||
crfree(sp->so_cred);
|
||||
sp->so_cred = crhold(so_inh->so_cred);
|
||||
}
|
||||
|
||||
TAILQ_CONCAT(&so_inh->sol_comp, &comp, so_list);
|
||||
so_inh->sol_qlen += qlen;
|
||||
|
||||
TAILQ_CONCAT(&so_inh->sol_incomp, &incomp, so_list);
|
||||
so_inh->sol_incqlen += incqlen;
|
||||
|
||||
SOCK_UNLOCK(so);
|
||||
if(head != NULL)
|
||||
SOLISTEN_UNLOCK(head);
|
||||
|
||||
SOCK_UNLOCK(so_inh);
|
||||
if(head_inh != NULL) {
|
||||
if(qlen > 0) {
|
||||
/*
|
||||
* "New" connections have arrived
|
||||
*/
|
||||
solisten_wakeup(head_inh);
|
||||
} else {
|
||||
SOLISTEN_UNLOCK(head_inh);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Close a socket on last file table reference removal. Initiate disconnect
|
||||
* if connected. Free socket when disconnect complete.
|
||||
@ -2776,6 +2870,7 @@ sosetopt(struct socket *so, struct sockopt *sopt)
|
||||
case SO_BROADCAST:
|
||||
case SO_REUSEADDR:
|
||||
case SO_REUSEPORT:
|
||||
case SO_REUSEPORT_LB:
|
||||
case SO_OOBINLINE:
|
||||
case SO_TIMESTAMP:
|
||||
case SO_BINTIME:
|
||||
@ -2994,6 +3089,7 @@ sogetopt(struct socket *so, struct sockopt *sopt)
|
||||
case SO_KEEPALIVE:
|
||||
case SO_REUSEADDR:
|
||||
case SO_REUSEPORT:
|
||||
case SO_REUSEPORT_LB:
|
||||
case SO_BROADCAST:
|
||||
case SO_OOBINLINE:
|
||||
case SO_ACCEPTCONN:
|
||||
|
@ -108,6 +108,9 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#include <security/mac/mac_framework.h>
|
||||
|
||||
#define INPCBLBGROUP_SIZMIN 8
|
||||
#define INPCBLBGROUP_SIZMAX 256
|
||||
|
||||
static struct callout ipport_tick_callout;
|
||||
|
||||
/*
|
||||
@ -217,6 +220,185 @@ SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtime,
|
||||
* functions often modify hash chains or addresses in pcbs.
|
||||
*/
|
||||
|
||||
static struct inpcblbgroup *
|
||||
in_pcblbgroup_alloc(struct inpcblbgrouphead *hdr, u_char vflag,
|
||||
uint16_t port, const union in_dependaddr *addr, int size)
|
||||
{
|
||||
struct inpcblbgroup *grp;
|
||||
|
||||
size_t bytes = __offsetof(struct inpcblbgroup, il_inp[size]);
|
||||
grp = malloc(bytes, M_PCB, M_ZERO | M_NOWAIT);
|
||||
if(!grp)
|
||||
return NULL;
|
||||
grp->il_vflag = vflag;
|
||||
grp->il_lport = port;
|
||||
grp->il_dependladdr = *addr;
|
||||
grp->il_inpsiz = size;
|
||||
LIST_INSERT_HEAD(hdr, grp, il_list);
|
||||
|
||||
return grp;
|
||||
}
|
||||
|
||||
static void
|
||||
in_pcblbgroup_free(struct inpcblbgroup *grp)
|
||||
{
|
||||
LIST_REMOVE(grp, il_list);
|
||||
free(grp, M_TEMP);
|
||||
}
|
||||
|
||||
static struct inpcblbgroup *
|
||||
in_pcblbgroup_resize(struct inpcblbgrouphead *hdr,
|
||||
struct inpcblbgroup *old_grp, int size)
|
||||
{
|
||||
struct inpcblbgroup *grp;
|
||||
int i;
|
||||
|
||||
grp = in_pcblbgroup_alloc(hdr, old_grp->il_vflag,
|
||||
old_grp->il_lport, &old_grp->il_dependladdr, size);
|
||||
if(!grp)
|
||||
return NULL;
|
||||
|
||||
KASSERT(old_grp->il_inpcnt < grp->il_inpsiz,
|
||||
("invalid new local group size %d and old local group count %d",
|
||||
grp->il_inpsiz, old_grp->il_inpcnt));
|
||||
for (i = 0; i < old_grp->il_inpcnt; ++i)
|
||||
grp->il_inp[i] = old_grp->il_inp[i];
|
||||
grp->il_inpcnt = old_grp->il_inpcnt;
|
||||
|
||||
in_pcblbgroup_free(old_grp);
|
||||
|
||||
return grp;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add PCB to lb group (load balance used by SO_REUSEPORT_LB)
|
||||
*/
|
||||
static int
|
||||
in_pcbinslbgrouphash(struct inpcb *inp, struct inpcbinfo *pcbinfo)
|
||||
{
|
||||
struct inpcblbgrouphead *hdr;
|
||||
struct inpcblbgroup *grp;
|
||||
|
||||
uint16_t hashmask = pcbinfo->ipi_lbgrouphashmask;
|
||||
uint16_t lport = inp->inp_lport;
|
||||
uint32_t group_index = INP_PCBLBGROUP_PORTHASH(lport, hashmask);
|
||||
|
||||
hdr = &pcbinfo->ipi_lbgrouphashbase[group_index];
|
||||
|
||||
struct ucred *cred;
|
||||
|
||||
if (pcbinfo->ipi_lbgrouphashbase == NULL)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* don't allow jailed socket to join local group
|
||||
*/
|
||||
if (inp->inp_socket != NULL)
|
||||
cred = inp->inp_socket->so_cred;
|
||||
else
|
||||
cred = NULL;
|
||||
if (cred != NULL && jailed(cred))
|
||||
return 0;
|
||||
|
||||
#ifdef INET6
|
||||
/*
|
||||
* don't allow IPv4 mapped INET6 wild socket
|
||||
*/
|
||||
if ((inp->inp_vflag & INP_IPV4) &&
|
||||
inp->inp_laddr.s_addr == INADDR_ANY &&
|
||||
INP_CHECK_SOCKAF(inp->inp_socket, AF_INET6)) {
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
hdr = &pcbinfo->ipi_lbgrouphashbase[
|
||||
INP_PCBLBGROUP_PORTHASH(inp->inp_lport, pcbinfo->ipi_lbgrouphashmask)];
|
||||
|
||||
LIST_FOREACH(grp, hdr, il_list) {
|
||||
if (grp->il_vflag == inp->inp_vflag &&
|
||||
grp->il_lport == inp->inp_lport &&
|
||||
memcmp(&grp->il_dependladdr,
|
||||
&inp->inp_inc.inc_ie.ie_dependladdr,
|
||||
sizeof(grp->il_dependladdr)) == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (grp == NULL) {
|
||||
/* Create new load balance group */
|
||||
grp = in_pcblbgroup_alloc(hdr, inp->inp_vflag,
|
||||
inp->inp_lport, &inp->inp_inc.inc_ie.ie_dependladdr,
|
||||
INPCBLBGROUP_SIZMIN);
|
||||
if(!grp)
|
||||
return (ENOBUFS);
|
||||
} else if (grp->il_inpcnt == grp->il_inpsiz) {
|
||||
if (grp->il_inpsiz >= INPCBLBGROUP_SIZMAX) {
|
||||
static int limit_logged = 0;
|
||||
|
||||
if (!limit_logged) {
|
||||
limit_logged = 1;
|
||||
printf("lb group port %d, "
|
||||
"limit reached\n", ntohs(grp->il_lport));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Expand this local group */
|
||||
grp = in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz * 2);
|
||||
if(!grp)
|
||||
return (ENOBUFS);
|
||||
}
|
||||
|
||||
KASSERT(grp->il_inpcnt < grp->il_inpsiz,
|
||||
("invalid local group size %d and count %d",
|
||||
grp->il_inpsiz, grp->il_inpcnt));
|
||||
|
||||
grp->il_inp[grp->il_inpcnt] = inp;
|
||||
grp->il_inpcnt++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
in_pcbremlbgrouphash(struct inpcb *inp, struct inpcbinfo *pcbinfo)
|
||||
{
|
||||
struct inpcblbgrouphead *hdr;
|
||||
struct inpcblbgroup *grp;
|
||||
|
||||
if (pcbinfo->ipi_lbgrouphashbase == NULL)
|
||||
return;
|
||||
|
||||
hdr = &pcbinfo->ipi_lbgrouphashbase[
|
||||
INP_PCBLBGROUP_PORTHASH(inp->inp_lport, pcbinfo->ipi_lbgrouphashmask)];
|
||||
|
||||
LIST_FOREACH(grp, hdr, il_list) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < grp->il_inpcnt; ++i) {
|
||||
if (grp->il_inp[i] != inp)
|
||||
continue;
|
||||
|
||||
if (grp->il_inpcnt == 1) {
|
||||
/* Free this local group */
|
||||
in_pcblbgroup_free(grp);
|
||||
} else {
|
||||
/* Pull up inpcbs */
|
||||
for (; i + 1 < grp->il_inpcnt; ++i)
|
||||
grp->il_inp[i] = grp->il_inp[i + 1];
|
||||
grp->il_inpcnt--;
|
||||
|
||||
if (grp->il_inpsiz > INPCBLBGROUP_SIZMIN &&
|
||||
grp->il_inpcnt <= (grp->il_inpsiz / 4)) {
|
||||
/* Shrink this local group */
|
||||
struct inpcblbgroup *new_grp =
|
||||
in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz / 2);
|
||||
if(new_grp)
|
||||
grp = new_grp;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Different protocols initialize their inpcbs differently - giving
|
||||
* different name to the lock. But they all are disposed the same.
|
||||
@ -252,6 +434,8 @@ in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
|
||||
&pcbinfo->ipi_hashmask);
|
||||
pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB,
|
||||
&pcbinfo->ipi_porthashmask);
|
||||
pcbinfo->ipi_lbgrouphashbase = hashinit(hash_nelements, M_PCB,
|
||||
&pcbinfo->ipi_lbgrouphashmask);
|
||||
#ifdef PCBGROUP
|
||||
in_pcbgroup_init(pcbinfo, hashfields, hash_nelements);
|
||||
#endif
|
||||
@ -275,6 +459,8 @@ in_pcbinfo_destroy(struct inpcbinfo *pcbinfo)
|
||||
hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask);
|
||||
hashdestroy(pcbinfo->ipi_porthashbase, M_PCB,
|
||||
pcbinfo->ipi_porthashmask);
|
||||
hashdestroy(pcbinfo->ipi_lbgrouphashbase, M_PCB,
|
||||
pcbinfo->ipi_lbgrouphashmask);
|
||||
#ifdef PCBGROUP
|
||||
in_pcbgroup_destroy(pcbinfo);
|
||||
#endif
|
||||
@ -513,13 +699,15 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp,
|
||||
/*
|
||||
* Return cached socket options.
|
||||
*/
|
||||
short
|
||||
int
|
||||
inp_so_options(const struct inpcb *inp)
|
||||
{
|
||||
short so_options;
|
||||
int so_options;
|
||||
|
||||
so_options = 0;
|
||||
|
||||
if ((inp->inp_flags2 & INP_REUSEPORT_LB) != 0)
|
||||
so_options |= SO_REUSEPORT_LB;
|
||||
if ((inp->inp_flags2 & INP_REUSEPORT) != 0)
|
||||
so_options |= SO_REUSEPORT;
|
||||
if ((inp->inp_flags2 & INP_REUSEADDR) != 0)
|
||||
@ -580,6 +768,12 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
|
||||
int lookupflags = 0, reuseport = (so->so_options & SO_REUSEPORT);
|
||||
int error;
|
||||
|
||||
/*
|
||||
* XXX Maybe we could let SO_REUSEPORT_LB set SO_REUSEPORT bit here
|
||||
* so that we don't have to add to the (already messy) code below
|
||||
*/
|
||||
int reuseport_lb = (so->so_options & SO_REUSEPORT_LB);
|
||||
|
||||
/*
|
||||
* No state changes, so read locks are sufficient here.
|
||||
*/
|
||||
@ -591,7 +785,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
|
||||
laddr.s_addr = *laddrp;
|
||||
if (nam != NULL && laddr.s_addr != INADDR_ANY)
|
||||
return (EINVAL);
|
||||
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
|
||||
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0)
|
||||
lookupflags = INPLOOKUP_WILDCARD;
|
||||
if (nam == NULL) {
|
||||
if ((error = prison_local_ip4(cred, &laddr)) != 0)
|
||||
@ -628,6 +822,10 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
|
||||
*/
|
||||
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0)
|
||||
reuseport = SO_REUSEADDR|SO_REUSEPORT;
|
||||
// XXX: How to deal with SO_REUSEPORT_LB here?
|
||||
// Added equivalent treatment as SO_REUSEPORT here for now
|
||||
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT_LB)) != 0)
|
||||
reuseport_lb = SO_REUSEADDR|SO_REUSEPORT_LB;
|
||||
} else if (sin->sin_addr.s_addr != INADDR_ANY) {
|
||||
sin->sin_port = 0; /* yech... */
|
||||
bzero(&sin->sin_zero, sizeof(sin->sin_zero));
|
||||
@ -667,7 +865,8 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
|
||||
ntohl(t->inp_faddr.s_addr) == INADDR_ANY) &&
|
||||
(ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
|
||||
ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
|
||||
(t->inp_flags2 & INP_REUSEPORT) == 0) &&
|
||||
(t->inp_flags2 & INP_REUSEPORT) ||
|
||||
(t->inp_flags2 & INP_REUSEPORT_LB) == 0) &&
|
||||
(inp->inp_cred->cr_uid !=
|
||||
t->inp_cred->cr_uid))
|
||||
return (EADDRINUSE);
|
||||
@ -692,11 +891,14 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
|
||||
*/
|
||||
tw = intotw(t);
|
||||
if (tw == NULL ||
|
||||
(reuseport & tw->tw_so_options) == 0)
|
||||
((reuseport & tw->tw_so_options) == 0 &&
|
||||
(reuseport_lb & tw->tw_so_options) == 0)) {
|
||||
return (EADDRINUSE);
|
||||
}
|
||||
} else if (t &&
|
||||
((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
|
||||
(reuseport & inp_so_options(t)) == 0) {
|
||||
(reuseport & inp_so_options(t)) == 0 &&
|
||||
(reuseport_lb & inp_so_options(t)) == 0) {
|
||||
#ifdef INET6
|
||||
if (ntohl(sin->sin_addr.s_addr) !=
|
||||
INADDR_ANY ||
|
||||
@ -1409,6 +1611,7 @@ in_pcbdrop(struct inpcb *inp)
|
||||
struct inpcbport *phd = inp->inp_phd;
|
||||
|
||||
INP_HASH_WLOCK(inp->inp_pcbinfo);
|
||||
in_pcbremlbgrouphash(inp, inp->inp_pcbinfo);
|
||||
LIST_REMOVE(inp, inp_hash);
|
||||
LIST_REMOVE(inp, inp_portlist);
|
||||
if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
|
||||
@ -1669,6 +1872,98 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
|
||||
}
|
||||
#undef INP_LOOKUP_MAPPED_PCB_COST
|
||||
|
||||
struct inpcb *
|
||||
in_pcblookup_lbgroup_last(const struct inpcb *inp)
|
||||
{
|
||||
const struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
|
||||
const struct inpcblbgrouphead *hdr;
|
||||
const struct inpcblbgroup *grp;
|
||||
int i;
|
||||
|
||||
if (pcbinfo->ipi_lbgrouphashbase == NULL)
|
||||
return NULL;
|
||||
|
||||
hdr = &pcbinfo->ipi_lbgrouphashbase[
|
||||
INP_PCBLBGROUP_PORTHASH(inp->inp_lport, pcbinfo->ipi_lbgrouphashmask)];
|
||||
|
||||
LIST_FOREACH(grp, hdr, il_list) {
|
||||
if (grp->il_vflag == inp->inp_vflag &&
|
||||
grp->il_lport == inp->inp_lport &&
|
||||
memcmp(&grp->il_dependladdr,
|
||||
&inp->inp_inc.inc_ie.ie_dependladdr,
|
||||
sizeof(grp->il_dependladdr)) == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (grp == NULL || grp->il_inpcnt == 1)
|
||||
return NULL;
|
||||
|
||||
KASSERT(grp->il_inpcnt >= 2,
|
||||
("invalid lbgroup inp count %d", grp->il_inpcnt));
|
||||
for (i = 0; i < grp->il_inpcnt; ++i) {
|
||||
if (grp->il_inp[i] == inp) {
|
||||
int last = grp->il_inpcnt - 1;
|
||||
|
||||
if (i == last)
|
||||
last = grp->il_inpcnt - 2;
|
||||
return grp->il_inp[last];
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct inpcb *
|
||||
in_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
|
||||
const struct in_addr *laddr, uint16_t lport, const struct in_addr *faddr,
|
||||
uint16_t fport, int lookupflags)
|
||||
{
|
||||
struct inpcb *local_wild = NULL;
|
||||
const struct inpcblbgrouphead *hdr;
|
||||
struct inpcblbgroup *grp;
|
||||
struct inpcblbgroup *grp_local_wild;
|
||||
|
||||
hdr = &pcbinfo->ipi_lbgrouphashbase[
|
||||
INP_PCBLBGROUP_PORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)];
|
||||
|
||||
/*
|
||||
* Order of socket selection:
|
||||
* 1. non-wild.
|
||||
* 2. wild (if lookupflags contains INPLOOKUP_WILDCARD).
|
||||
*
|
||||
* NOTE:
|
||||
* - Load balanced group does not contain jailed sockets
|
||||
* - Load balanced group does not contain IPv4 mapped INET6 wild sockets
|
||||
*/
|
||||
LIST_FOREACH(grp, hdr, il_list) {
|
||||
#ifdef INET6
|
||||
if (!(grp->il_vflag & INP_IPV4))
|
||||
continue;
|
||||
#endif
|
||||
|
||||
if (grp->il_lport == lport) {
|
||||
|
||||
uint32_t idx = 0;
|
||||
int pkt_hash = INP_PCBLBGROUP_PKTHASH(faddr->s_addr, lport, fport);
|
||||
|
||||
idx = pkt_hash % grp->il_inpcnt;
|
||||
|
||||
if (grp->il_laddr.s_addr == laddr->s_addr) {
|
||||
return grp->il_inp[idx];
|
||||
} else {
|
||||
if (grp->il_laddr.s_addr == INADDR_ANY &&
|
||||
(lookupflags & INPLOOKUP_WILDCARD)) {
|
||||
local_wild = grp->il_inp[idx];
|
||||
grp_local_wild = grp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (local_wild != NULL) {
|
||||
return local_wild;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef PCBGROUP
|
||||
/*
|
||||
* Lookup PCB in hash list, using pcbgroup tables.
|
||||
@ -1947,6 +2242,18 @@ in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
|
||||
if (tmpinp != NULL)
|
||||
return (tmpinp);
|
||||
|
||||
/*
|
||||
* Then look in lb group (for wildcard match)
|
||||
*/
|
||||
if (pcbinfo->ipi_lbgrouphashbase != NULL &&
|
||||
(lookupflags & INPLOOKUP_WILDCARD)) {
|
||||
inp = in_pcblookup_lbgroup(pcbinfo, &laddr, lport, &faddr, fport,
|
||||
lookupflags);
|
||||
if (inp != NULL) {
|
||||
return inp;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Then look for a wildcard match, if requested.
|
||||
*/
|
||||
@ -2164,6 +2471,7 @@ in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update)
|
||||
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
|
||||
struct inpcbport *phd;
|
||||
u_int32_t hashkey_faddr;
|
||||
int so_options;
|
||||
|
||||
INP_WLOCK_ASSERT(inp);
|
||||
INP_HASH_WLOCK_ASSERT(pcbinfo);
|
||||
@ -2184,6 +2492,20 @@ in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update)
|
||||
pcbporthash = &pcbinfo->ipi_porthashbase[
|
||||
INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)];
|
||||
|
||||
|
||||
/*
|
||||
* Add entry in lb group
|
||||
* Only do this if SO_REUSEPORT_LB is set
|
||||
*/
|
||||
so_options = inp_so_options(inp);
|
||||
if(so_options & SO_REUSEPORT_LB) {
|
||||
int ret = in_pcbinslbgrouphash(inp, pcbinfo);
|
||||
if(ret) {
|
||||
// pcb lb group malloc fail (ret=ENOBUFS)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Go through port list and look for a head for this lport.
|
||||
*/
|
||||
@ -2310,6 +2632,10 @@ in_pcbremlists(struct inpcb *inp)
|
||||
struct inpcbport *phd = inp->inp_phd;
|
||||
|
||||
INP_HASH_WLOCK(pcbinfo);
|
||||
|
||||
// XXX Only do if SO_REUSEPORT_LB set?
|
||||
in_pcbremlbgrouphash(inp, pcbinfo);
|
||||
|
||||
LIST_REMOVE(inp, inp_hash);
|
||||
LIST_REMOVE(inp, inp_portlist);
|
||||
if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
|
||||
|
@ -78,6 +78,11 @@ struct in_addr_4in6 {
|
||||
struct in_addr ia46_addr4;
|
||||
};
|
||||
|
||||
union in_dependaddr {
|
||||
struct in_addr_4in6 id46_addr;
|
||||
struct in6_addr id6_addr;
|
||||
};
|
||||
|
||||
/*
|
||||
* NOTE: ipv6 addrs should be 64-bit aligned, per RFC 2553. in_conninfo has
|
||||
* some extra padding to accomplish this.
|
||||
@ -88,22 +93,14 @@ struct in_endpoints {
|
||||
u_int16_t ie_fport; /* foreign port */
|
||||
u_int16_t ie_lport; /* local port */
|
||||
/* protocol dependent part, local and foreign addr */
|
||||
union {
|
||||
/* foreign host table entry */
|
||||
struct in_addr_4in6 ie46_foreign;
|
||||
struct in6_addr ie6_foreign;
|
||||
} ie_dependfaddr;
|
||||
union {
|
||||
/* local host table entry */
|
||||
struct in_addr_4in6 ie46_local;
|
||||
struct in6_addr ie6_local;
|
||||
} ie_dependladdr;
|
||||
union in_dependaddr ie_dependfaddr; /* foreign host table entry */
|
||||
union in_dependaddr ie_dependladdr; /* local host table entry */
|
||||
#define ie_faddr ie_dependfaddr.id46_addr.ia46_addr4
|
||||
#define ie_laddr ie_dependladdr.id46_addr.ia46_addr4
|
||||
#define ie6_faddr ie_dependfaddr.id6_addr
|
||||
#define ie6_laddr ie_dependladdr.id6_addr
|
||||
u_int32_t ie6_zoneid; /* scope zone id */
|
||||
};
|
||||
#define ie_faddr ie_dependfaddr.ie46_foreign.ia46_addr4
|
||||
#define ie_laddr ie_dependladdr.ie46_local.ia46_addr4
|
||||
#define ie6_faddr ie_dependfaddr.ie6_foreign
|
||||
#define ie6_laddr ie_dependladdr.ie6_local
|
||||
|
||||
/*
|
||||
* XXX The defines for inc_* are hacks and should be changed to direct
|
||||
@ -407,6 +404,21 @@ struct inpcbport {
|
||||
u_short phd_port;
|
||||
};
|
||||
|
||||
struct inpcblbgroup {
|
||||
LIST_ENTRY(inpcblbgroup) il_list;
|
||||
uint16_t il_lport;
|
||||
u_char il_vflag;
|
||||
u_char il_pad;
|
||||
uint32_t il_pad2;
|
||||
union in_dependaddr il_dependladdr;
|
||||
#define il_laddr il_dependladdr.id46_addr.ia46_addr4
|
||||
#define il6_laddr il_dependladdr.id6_addr
|
||||
uint32_t il_inpsiz; /* size of il_inp[] */
|
||||
uint32_t il_inpcnt; /* # of elem in il_inp[] */
|
||||
struct inpcb *il_inp[];
|
||||
};
|
||||
LIST_HEAD(inpcblbgrouphead, inpcblbgroup);
|
||||
|
||||
/*-
|
||||
* Global data structure for each high-level protocol (UDP, TCP, ...) in both
|
||||
* IPv4 and IPv6. Holds inpcb lists and information for managing them.
|
||||
@ -499,6 +511,13 @@ struct inpcbinfo {
|
||||
struct inpcbhead *ipi_wildbase; /* (p) */
|
||||
u_long ipi_wildmask; /* (p) */
|
||||
|
||||
/*
|
||||
* Load balanced group used by the SO_REUSEPORT_LB option,
|
||||
* hashed by local address and local port.
|
||||
*/
|
||||
struct inpcblbgrouphead *ipi_lbgrouphashbase;
|
||||
u_long ipi_lbgrouphashmask;
|
||||
|
||||
/*
|
||||
* Pointer to network stack instance
|
||||
*/
|
||||
@ -585,7 +604,7 @@ struct tcpcb *
|
||||
inp_inpcbtotcpcb(struct inpcb *inp);
|
||||
void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
|
||||
uint32_t *faddr, uint16_t *fp);
|
||||
short inp_so_options(const struct inpcb *inp);
|
||||
int inp_so_options(const struct inpcb *inp);
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
@ -648,6 +667,10 @@ short inp_so_options(const struct inpcb *inp);
|
||||
(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
|
||||
#define INP_PCBPORTHASH(lport, mask) \
|
||||
(ntohs((lport)) & (mask))
|
||||
#define INP_PCBLBGROUP_PORTHASH(lport, mask) \
|
||||
(ntohs((lport)) & (mask))
|
||||
#define INP_PCBLBGROUP_PKTHASH(faddr, lport, fport) \
|
||||
((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport)))
|
||||
#define INP6_PCBHASHKEY(faddr) ((faddr)->s6_addr32[3])
|
||||
|
||||
/*
|
||||
@ -716,6 +739,7 @@ short inp_so_options(const struct inpcb *inp);
|
||||
#define INP_RATE_LIMIT_CHANGED 0x00000400 /* rate limit needs attention */
|
||||
#define INP_ORIGDSTADDR 0x00000800 /* receive IP dst address/port */
|
||||
#define INP_CANNOT_DO_ECN 0x00001000 /* The stack does not do ECN */
|
||||
#define INP_REUSEPORT_LB 0x00002000 /* SO_REUSEPORT_LB option is set */
|
||||
|
||||
/*
|
||||
* Flags passed to in_pcblookup*() functions.
|
||||
@ -818,6 +842,8 @@ struct inpcb *
|
||||
struct inpcb *
|
||||
in_pcblookup(struct inpcbinfo *, struct in_addr, u_int,
|
||||
struct in_addr, u_int, int, struct ifnet *);
|
||||
struct inpcb *
|
||||
in_pcblookup_lbgroup_last(const struct inpcb *inp);
|
||||
struct inpcb *
|
||||
in_pcblookup_mbuf(struct inpcbinfo *, struct in_addr, u_int,
|
||||
struct in_addr, u_int, int, struct ifnet *, struct mbuf *);
|
||||
|
@ -986,6 +986,15 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
|
||||
INP_WUNLOCK(inp);
|
||||
error = 0;
|
||||
break;
|
||||
case SO_REUSEPORT_LB:
|
||||
INP_WLOCK(inp);
|
||||
if ((so->so_options & SO_REUSEPORT_LB) != 0)
|
||||
inp->inp_flags2 |= INP_REUSEPORT_LB;
|
||||
else
|
||||
inp->inp_flags2 &= ~INP_REUSEPORT_LB;
|
||||
INP_WUNLOCK(inp);
|
||||
error = 0;
|
||||
break;
|
||||
case SO_SETFIB:
|
||||
INP_WLOCK(inp);
|
||||
inp->inp_inc.inc_fibnum = so->so_fibnum;
|
||||
|
@ -1956,10 +1956,28 @@ tcp_close(struct tcpcb *tp)
|
||||
{
|
||||
struct inpcb *inp = tp->t_inpcb;
|
||||
struct socket *so;
|
||||
struct inpcb *inp_inh = NULL;
|
||||
int listen = tp->t_state & TCPS_LISTEN;
|
||||
|
||||
INP_INFO_LOCK_ASSERT(&V_tcbinfo);
|
||||
INP_WLOCK_ASSERT(inp);
|
||||
|
||||
if (listen) {
|
||||
/*
|
||||
* Pending socket/syncache inheritance
|
||||
*
|
||||
* If this is a listen(2) socket, find another listen(2)
|
||||
* socket in the same local group, which could inherit
|
||||
* the syncache and sockets pending on the completion
|
||||
* and incompletion queues.
|
||||
*
|
||||
* NOTE:
|
||||
* Currently the inheritance could only happen on the
|
||||
* listen(2) sockets with SO_REUSEPORT_LB set.
|
||||
*/
|
||||
inp_inh = in_pcblookup_lbgroup_last(inp);
|
||||
}
|
||||
|
||||
#ifdef TCP_OFFLOAD
|
||||
if (tp->t_state == TCPS_LISTEN)
|
||||
tcp_offload_listen_stop(tp);
|
||||
@ -1979,7 +1997,16 @@ tcp_close(struct tcpcb *tp)
|
||||
tcp_state_change(tp, TCPS_CLOSED);
|
||||
KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));
|
||||
so = inp->inp_socket;
|
||||
|
||||
soisdisconnected(so);
|
||||
|
||||
if(listen)
|
||||
{
|
||||
if(inp_inh != NULL && inp_inh->inp_socket != NULL) {
|
||||
soinherit(so, inp_inh->inp_socket);
|
||||
}
|
||||
}
|
||||
|
||||
if (inp->inp_flags & INP_SOCKREF) {
|
||||
KASSERT(so->so_state & SS_PROTOREF,
|
||||
("tcp_close: !SS_PROTOREF"));
|
||||
|
@ -612,7 +612,7 @@ udp_input(struct mbuf **mp, int *offp, int proto)
|
||||
* will never clear these options after setting them.
|
||||
*/
|
||||
if ((last->inp_socket->so_options &
|
||||
(SO_REUSEPORT|SO_REUSEADDR)) == 0)
|
||||
(SO_REUSEPORT|SO_REUSEPORT_LB|SO_REUSEADDR)) == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -125,6 +125,12 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
|
||||
int error, lookupflags = 0;
|
||||
int reuseport = (so->so_options & SO_REUSEPORT);
|
||||
|
||||
/*
|
||||
* XXX Maybe we could let SO_REUSEPORT_LB set SO_REUSEPORT bit here
|
||||
* so that we don't have to add to the (already messy) code below
|
||||
*/
|
||||
int reuseport_lb = (so->so_options & SO_REUSEPORT_LB);
|
||||
|
||||
INP_WLOCK_ASSERT(inp);
|
||||
INP_HASH_WLOCK_ASSERT(pcbinfo);
|
||||
|
||||
@ -132,7 +138,7 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
|
||||
return (EADDRNOTAVAIL);
|
||||
if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
|
||||
return (EINVAL);
|
||||
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
|
||||
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0)
|
||||
lookupflags = INPLOOKUP_WILDCARD;
|
||||
if (nam == NULL) {
|
||||
if ((error = prison_local_ip6(cred, &inp->in6p_laddr,
|
||||
@ -166,6 +172,10 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
|
||||
*/
|
||||
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0)
|
||||
reuseport = SO_REUSEADDR|SO_REUSEPORT;
|
||||
// XXX: How to deal with SO_REUSEPORT_LB here?
|
||||
// Added equivalent treatment as SO_REUSEPORT here for now
|
||||
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT_LB)) != 0)
|
||||
reuseport_lb = SO_REUSEADDR|SO_REUSEPORT_LB;
|
||||
} else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
|
||||
struct ifaddr *ifa;
|
||||
|
||||
@ -214,7 +224,8 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
|
||||
IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) &&
|
||||
(!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
|
||||
!IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) ||
|
||||
(t->inp_flags2 & INP_REUSEPORT) == 0) &&
|
||||
(t->inp_flags2 & INP_REUSEPORT) ||
|
||||
(t->inp_flags2 & INP_REUSEPORT_LB) == 0) &&
|
||||
(inp->inp_cred->cr_uid !=
|
||||
t->inp_cred->cr_uid))
|
||||
return (EADDRINUSE);
|
||||
@ -264,9 +275,11 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
|
||||
*/
|
||||
tw = intotw(t);
|
||||
if (tw == NULL ||
|
||||
(reuseport & tw->tw_so_options) == 0)
|
||||
((reuseport & tw->tw_so_options) == 0 &&
|
||||
(reuseport_lb & tw->tw_so_options) == 0))
|
||||
return (EADDRINUSE);
|
||||
} else if (t && (reuseport & inp_so_options(t)) == 0) {
|
||||
} else if (t && (reuseport & inp_so_options(t)) == 0 &&
|
||||
(reuseport_lb & inp_so_options(t)) == 0) {
|
||||
return (EADDRINUSE);
|
||||
}
|
||||
#ifdef INET
|
||||
@ -282,6 +295,7 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
|
||||
if (tw == NULL)
|
||||
return (EADDRINUSE);
|
||||
if ((reuseport & tw->tw_so_options) == 0
|
||||
&& (reuseport_lb & tw->tw_so_options) == 0
|
||||
&& (ntohl(t->inp_laddr.s_addr) !=
|
||||
INADDR_ANY || ((inp->inp_vflag &
|
||||
INP_IPV6PROTO) ==
|
||||
@ -289,10 +303,12 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
|
||||
return (EADDRINUSE);
|
||||
} else if (t &&
|
||||
(reuseport & inp_so_options(t)) == 0 &&
|
||||
(reuseport_lb & inp_so_options(t)) == 0 &&
|
||||
(ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
|
||||
(t->inp_vflag & INP_IPV6PROTO) != 0))
|
||||
(t->inp_vflag & INP_IPV6PROTO) != 0)) {
|
||||
return (EADDRINUSE);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
inp->in6p_laddr = sin6->sin6_addr;
|
||||
@ -856,6 +872,54 @@ in6_rtchange(struct inpcb *inp, int errno)
|
||||
return inp;
|
||||
}
|
||||
|
||||
static struct inpcb *
|
||||
in6_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
|
||||
const struct in6_addr *laddr, uint16_t lport, const struct in6_addr *faddr,
|
||||
uint16_t fport, int lookupflags)
|
||||
{
|
||||
struct inpcb *local_wild = NULL;
|
||||
const struct inpcblbgrouphead *hdr;
|
||||
struct inpcblbgroup *grp;
|
||||
struct inpcblbgroup *grp_local_wild;
|
||||
|
||||
hdr = &pcbinfo->ipi_lbgrouphashbase[
|
||||
INP_PCBLBGROUP_PORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)];
|
||||
|
||||
/*
|
||||
* Order of socket selection:
|
||||
* 1. non-wild.
|
||||
* 2. wild (if lookupflags contains INPLOOKUP_WILDCARD).
|
||||
*
|
||||
* NOTE:
|
||||
* - Load balanced group does not contain jailed sockets
|
||||
* - Load balanced does not contain IPv4 mapped INET6 wild sockets
|
||||
*/
|
||||
LIST_FOREACH(grp, hdr, il_list) {
|
||||
|
||||
if (grp->il_lport == lport) {
|
||||
uint32_t idx = 0;
|
||||
int pkt_hash = INP_PCBLBGROUP_PKTHASH(
|
||||
INP6_PCBHASHKEY(faddr), lport, fport);
|
||||
|
||||
idx = pkt_hash % grp->il_inpcnt;
|
||||
|
||||
if (IN6_ARE_ADDR_EQUAL(&grp->il6_laddr, laddr)) {
|
||||
return grp->il_inp[idx];
|
||||
} else {
|
||||
if (IN6_IS_ADDR_UNSPECIFIED(&grp->il6_laddr) &&
|
||||
(lookupflags & INPLOOKUP_WILDCARD)) {
|
||||
local_wild = grp->il_inp[idx];
|
||||
grp_local_wild = grp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (local_wild != NULL) {
|
||||
return local_wild;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef PCBGROUP
|
||||
/*
|
||||
* Lookup PCB in hash list, using pcbgroup tables.
|
||||
@ -1057,6 +1121,8 @@ found:
|
||||
}
|
||||
#endif /* PCBGROUP */
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Lookup PCB in hash list.
|
||||
*/
|
||||
@ -1102,6 +1168,18 @@ in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
|
||||
if (tmpinp != NULL)
|
||||
return (tmpinp);
|
||||
|
||||
/*
|
||||
* Then look in lb group (for wildcard match)
|
||||
*/
|
||||
if (pcbinfo->ipi_lbgrouphashbase != NULL &&
|
||||
(lookupflags & INPLOOKUP_WILDCARD)) {
|
||||
inp = in6_pcblookup_lbgroup(pcbinfo, laddr, lport, faddr,
|
||||
fport, lookupflags);
|
||||
if (inp != NULL) {
|
||||
return inp;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Then look for a wildcard match, if requested.
|
||||
*/
|
||||
|
@ -973,7 +973,7 @@ in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred)
|
||||
return(error);
|
||||
|
||||
/* XXX: this is redundant when called from in6_pcbbind */
|
||||
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
|
||||
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0)
|
||||
lookupflags = INPLOOKUP_WILDCARD;
|
||||
|
||||
inp->inp_flags |= INP_ANONPORT;
|
||||
|
@ -1454,6 +1454,15 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
|
||||
INP_WUNLOCK(in6p);
|
||||
error = 0;
|
||||
break;
|
||||
case SO_REUSEPORT_LB:
|
||||
INP_WLOCK(in6p);
|
||||
if ((so->so_options & SO_REUSEPORT_LB) != 0)
|
||||
in6p->inp_flags2 |= INP_REUSEPORT_LB;
|
||||
else
|
||||
in6p->inp_flags2 &= ~INP_REUSEPORT_LB;
|
||||
INP_WUNLOCK(in6p);
|
||||
error = 0;
|
||||
break;
|
||||
case SO_SETFIB:
|
||||
INP_WLOCK(in6p);
|
||||
in6p->inp_inc.inc_fibnum = so->so_fibnum;
|
||||
|
@ -399,7 +399,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
|
||||
* will never clear these options after setting them.
|
||||
*/
|
||||
if ((last->inp_socket->so_options &
|
||||
(SO_REUSEPORT|SO_REUSEADDR)) == 0)
|
||||
(SO_REUSEPORT|SO_REUSEPORT_LB|SO_REUSEADDR)) == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -147,6 +147,9 @@ typedef __uintptr_t uintptr_t;
|
||||
#define SO_NO_OFFLOAD 0x4000 /* socket cannot be offloaded */
|
||||
#define SO_NO_DDP 0x8000 /* disable direct data placement */
|
||||
|
||||
// XXX: so_options was only 16 bit, now globally increased to 32 bit
|
||||
#define SO_REUSEPORT_LB 0x00010000 /* reuse with load balancing */
|
||||
|
||||
/*
|
||||
* Additional options, not kept in so_options.
|
||||
*/
|
||||
|
@ -84,7 +84,7 @@ struct socket {
|
||||
struct selinfo so_rdsel; /* (b/cr) for so_rcv/so_comp */
|
||||
struct selinfo so_wrsel; /* (b/cs) for so_snd */
|
||||
short so_type; /* (a) generic type, see socket.h */
|
||||
short so_options; /* (b) from socket call, see socket.h */
|
||||
int so_options; /* (b) from socket call, see socket.h */
|
||||
short so_linger; /* time to linger close(2) */
|
||||
short so_state; /* (b) internal state flags SS_* */
|
||||
void *so_pcb; /* protocol control block */
|
||||
@ -399,6 +399,7 @@ int socreate(int dom, struct socket **aso, int type, int proto,
|
||||
int sodisconnect(struct socket *so);
|
||||
struct sockaddr *sodupsockaddr(const struct sockaddr *sa, int mflags);
|
||||
void sofree(struct socket *so);
|
||||
void soinherit(struct socket *so, struct socket *so_inh);
|
||||
void sohasoutofband(struct socket *so);
|
||||
int solisten(struct socket *so, int backlog, struct thread *td);
|
||||
void solisten_proto(struct socket *so, int backlog);
|
||||
|
Loading…
x
Reference in New Issue
Block a user