diff --git a/cddl/lib/libdtrace/tcp.d b/cddl/lib/libdtrace/tcp.d index 383d284821b5..b3e1764ff1a5 100644 --- a/cddl/lib/libdtrace/tcp.d +++ b/cddl/lib/libdtrace/tcp.d @@ -192,12 +192,12 @@ translator tcpsinfo_t < struct tcpcb *p > { tcps_rport = p == NULL ? 0 : ntohs(p->t_inpcb->inp_inc.inc_ie.ie_fport); tcps_laddr = p == NULL ? 0 : p->t_inpcb->inp_vflag == INP_IPV4 ? - inet_ntoa(&p->t_inpcb->inp_inc.inc_ie.ie_dependladdr.id46_addr.ia46_addr4.s_addr) : - inet_ntoa6(&p->t_inpcb->inp_inc.inc_ie.ie_dependladdr.id6_addr); + inet_ntoa(&p->t_inpcb->inp_inc.inc_ie.ie_dependladdr.ie46_local.ia46_addr4.s_addr) : + inet_ntoa6(&p->t_inpcb->inp_inc.inc_ie.ie_dependladdr.ie6_local); tcps_raddr = p == NULL ? 0 : p->t_inpcb->inp_vflag == INP_IPV4 ? - inet_ntoa(&p->t_inpcb->inp_inc.inc_ie.ie_dependfaddr.id46_addr.ia46_addr4.s_addr) : - inet_ntoa6(&p->t_inpcb->inp_inc.inc_ie.ie_dependfaddr.id6_addr); + inet_ntoa(&p->t_inpcb->inp_inc.inc_ie.ie_dependfaddr.ie46_foreign.ia46_addr4.s_addr) : + inet_ntoa6(&p->t_inpcb->inp_inc.inc_ie.ie_dependfaddr.ie6_foreign); tcps_state = p == NULL ? -1 : p->t_state; tcps_iss = p == NULL ? 0 : p->iss; tcps_irs = p == NULL ? 0 : p->irs; diff --git a/sys/kern/uipc_debug.c b/sys/kern/uipc_debug.c index 76620477fab5..7a4cf1cdb028 100644 --- a/sys/kern/uipc_debug.c +++ b/sys/kern/uipc_debug.c @@ -77,7 +77,7 @@ db_print_sotype(short so_type) } static void -db_print_sooptions(int so_options) +db_print_sooptions(short so_options) { int comma; @@ -122,10 +122,6 @@ db_print_sooptions(int so_options) db_printf("%sSO_REUSEPORT", comma ? ", " : ""); comma = 1; } - if (so_options & SO_REUSEPORT_LB) { - db_printf("%sSO_REUSEPORT_LB", comma ? ", " : ""); - comma = 1; - } if (so_options & SO_TIMESTAMP) { db_printf("%sSO_TIMESTAMP", comma ? ", " : ""); comma = 1; diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 71e8bc306e83..416de462c5b5 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -1056,100 +1056,6 @@ sofree(struct socket *so) sodealloc(so); } -/* - * Let socket in same load balance group (same port and address) - * inherit pending sockets of the closing socket. - * - * "so_inh" will inherit sockets from "so" - */ -void -soinherit(struct socket *so, struct socket *so_inh) -{ - TAILQ_HEAD(, socket) comp, incomp; - struct socket *sp, *head, *head_inh; - int qlen, incqlen; - - KASSERT(so->so_options & SO_ACCEPTCONN, - ("so does not accept connection")); - KASSERT(so_inh->so_options & SO_ACCEPTCONN, - ("so_inh does not accept connection")); - - -restart: - SOCK_LOCK(so); - if ((head = so->so_listen) != NULL && - __predict_false(SOLISTEN_TRYLOCK(head) == 0)) { - SOCK_UNLOCK(so); - goto restart; - } - -restart_inh: - SOCK_LOCK(so_inh); - if ((head_inh = so_inh->so_listen) != NULL && - __predict_false(SOLISTEN_TRYLOCK(head_inh) == 0)) { - SOCK_UNLOCK(so_inh); - goto restart_inh; - } - - TAILQ_INIT(&comp); - TAILQ_INIT(&incomp); - - /* - * Save completed queue and incompleted queue - */ - TAILQ_CONCAT(&comp, &so->sol_comp, so_list); - qlen = so->sol_qlen; - so->sol_qlen = 0; - - TAILQ_CONCAT(&incomp, &so->sol_incomp, so_list); - incqlen = so->sol_incqlen; - so->sol_incqlen = 0; - - /* - * Append the saved completed queue and incompleted - * queue to the socket inherits them. - * - * XXX - * This may temporarily break the inheriting socket's - * so_qlimit. - */ - TAILQ_FOREACH(sp, &comp, so_list) { - refcount_acquire(&so_inh->so_count); - sp->so_listen = so_inh; - crfree(sp->so_cred); - sp->so_cred = crhold(so_inh->so_cred); - } - - TAILQ_FOREACH(sp, &incomp, so_list) { - refcount_acquire(&so_inh->so_count); - sp->so_listen = so_inh; - crfree(sp->so_cred); - sp->so_cred = crhold(so_inh->so_cred); - } - - TAILQ_CONCAT(&so_inh->sol_comp, &comp, so_list); - so_inh->sol_qlen += qlen; - - TAILQ_CONCAT(&so_inh->sol_incomp, &incomp, so_list); - so_inh->sol_incqlen += incqlen; - - SOCK_UNLOCK(so); - if(head != NULL) - SOLISTEN_UNLOCK(head); - - SOCK_UNLOCK(so_inh); - if(head_inh != NULL) { - if(qlen > 0) { - /* - * "New" connections have arrived - */ - solisten_wakeup(head_inh); - } else { - SOLISTEN_UNLOCK(head_inh); - } - } -} - /* * Close a socket on last file table reference removal. Initiate disconnect * if connected. Free socket when disconnect complete. @@ -2870,7 +2776,6 @@ sosetopt(struct socket *so, struct sockopt *sopt) case SO_BROADCAST: case SO_REUSEADDR: case SO_REUSEPORT: - case SO_REUSEPORT_LB: case SO_OOBINLINE: case SO_TIMESTAMP: case SO_BINTIME: @@ -3089,7 +2994,6 @@ sogetopt(struct socket *so, struct sockopt *sopt) case SO_KEEPALIVE: case SO_REUSEADDR: case SO_REUSEPORT: - case SO_REUSEPORT_LB: case SO_BROADCAST: case SO_OOBINLINE: case SO_ACCEPTCONN: diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index 8706cf6a13e6..19eb5af9596d 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -108,9 +108,6 @@ __FBSDID("$FreeBSD$"); #include -#define INPCBLBGROUP_SIZMIN 8 -#define INPCBLBGROUP_SIZMAX 256 - static struct callout ipport_tick_callout; /* @@ -220,185 +217,6 @@ SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtime, * functions often modify hash chains or addresses in pcbs. */ -static struct inpcblbgroup * -in_pcblbgroup_alloc(struct inpcblbgrouphead *hdr, u_char vflag, - uint16_t port, const union in_dependaddr *addr, int size) -{ - struct inpcblbgroup *grp; - - size_t bytes = __offsetof(struct inpcblbgroup, il_inp[size]); - grp = malloc(bytes, M_PCB, M_ZERO | M_NOWAIT); - if(!grp) - return NULL; - grp->il_vflag = vflag; - grp->il_lport = port; - grp->il_dependladdr = *addr; - grp->il_inpsiz = size; - LIST_INSERT_HEAD(hdr, grp, il_list); - - return grp; -} - -static void -in_pcblbgroup_free(struct inpcblbgroup *grp) -{ - LIST_REMOVE(grp, il_list); - free(grp, M_TEMP); -} - -static struct inpcblbgroup * -in_pcblbgroup_resize(struct inpcblbgrouphead *hdr, - struct inpcblbgroup *old_grp, int size) -{ - struct inpcblbgroup *grp; - int i; - - grp = in_pcblbgroup_alloc(hdr, old_grp->il_vflag, - old_grp->il_lport, &old_grp->il_dependladdr, size); - if(!grp) - return NULL; - - KASSERT(old_grp->il_inpcnt < grp->il_inpsiz, - ("invalid new local group size %d and old local group count %d", - grp->il_inpsiz, old_grp->il_inpcnt)); - for (i = 0; i < old_grp->il_inpcnt; ++i) - grp->il_inp[i] = old_grp->il_inp[i]; - grp->il_inpcnt = old_grp->il_inpcnt; - - in_pcblbgroup_free(old_grp); - - return grp; -} - -/* - * Add PCB to lb group (load balance used by SO_REUSEPORT_LB) - */ -static int -in_pcbinslbgrouphash(struct inpcb *inp, struct inpcbinfo *pcbinfo) -{ - struct inpcblbgrouphead *hdr; - struct inpcblbgroup *grp; - - uint16_t hashmask = pcbinfo->ipi_lbgrouphashmask; - uint16_t lport = inp->inp_lport; - uint32_t group_index = INP_PCBLBGROUP_PORTHASH(lport, hashmask); - - hdr = &pcbinfo->ipi_lbgrouphashbase[group_index]; - - struct ucred *cred; - - if (pcbinfo->ipi_lbgrouphashbase == NULL) - return 0; - - /* - * don't allow jailed socket to join local group - */ - if (inp->inp_socket != NULL) - cred = inp->inp_socket->so_cred; - else - cred = NULL; - if (cred != NULL && jailed(cred)) - return 0; - -#ifdef INET6 - /* - * don't allow IPv4 mapped INET6 wild socket - */ - if ((inp->inp_vflag & INP_IPV4) && - inp->inp_laddr.s_addr == INADDR_ANY && - INP_CHECK_SOCKAF(inp->inp_socket, AF_INET6)) { - return 0; - } -#endif - - hdr = &pcbinfo->ipi_lbgrouphashbase[ - INP_PCBLBGROUP_PORTHASH(inp->inp_lport, pcbinfo->ipi_lbgrouphashmask)]; - - LIST_FOREACH(grp, hdr, il_list) { - if (grp->il_vflag == inp->inp_vflag && - grp->il_lport == inp->inp_lport && - memcmp(&grp->il_dependladdr, - &inp->inp_inc.inc_ie.ie_dependladdr, - sizeof(grp->il_dependladdr)) == 0) { - break; - } - } - if (grp == NULL) { - /* Create new load balance group */ - grp = in_pcblbgroup_alloc(hdr, inp->inp_vflag, - inp->inp_lport, &inp->inp_inc.inc_ie.ie_dependladdr, - INPCBLBGROUP_SIZMIN); - if(!grp) - return (ENOBUFS); - } else if (grp->il_inpcnt == grp->il_inpsiz) { - if (grp->il_inpsiz >= INPCBLBGROUP_SIZMAX) { - static int limit_logged = 0; - - if (!limit_logged) { - limit_logged = 1; - printf("lb group port %d, " - "limit reached\n", ntohs(grp->il_lport)); - } - return 0; - } - - /* Expand this local group */ - grp = in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz * 2); - if(!grp) - return (ENOBUFS); - } - - KASSERT(grp->il_inpcnt < grp->il_inpsiz, - ("invalid local group size %d and count %d", - grp->il_inpsiz, grp->il_inpcnt)); - - grp->il_inp[grp->il_inpcnt] = inp; - grp->il_inpcnt++; - return 0; -} - -static void -in_pcbremlbgrouphash(struct inpcb *inp, struct inpcbinfo *pcbinfo) -{ - struct inpcblbgrouphead *hdr; - struct inpcblbgroup *grp; - - if (pcbinfo->ipi_lbgrouphashbase == NULL) - return; - - hdr = &pcbinfo->ipi_lbgrouphashbase[ - INP_PCBLBGROUP_PORTHASH(inp->inp_lport, pcbinfo->ipi_lbgrouphashmask)]; - - LIST_FOREACH(grp, hdr, il_list) { - int i; - - for (i = 0; i < grp->il_inpcnt; ++i) { - if (grp->il_inp[i] != inp) - continue; - - if (grp->il_inpcnt == 1) { - /* Free this local group */ - in_pcblbgroup_free(grp); - } else { - /* Pull up inpcbs */ - for (; i + 1 < grp->il_inpcnt; ++i) - grp->il_inp[i] = grp->il_inp[i + 1]; - grp->il_inpcnt--; - - if (grp->il_inpsiz > INPCBLBGROUP_SIZMIN && - grp->il_inpcnt <= (grp->il_inpsiz / 4)) { - /* Shrink this local group */ - struct inpcblbgroup *new_grp = - in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz / 2); - if(new_grp) - grp = new_grp; - } - } - return; - } - } -} - /* * Different protocols initialize their inpcbs differently - giving * different name to the lock. But they all are disposed the same. @@ -434,8 +252,6 @@ in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name, &pcbinfo->ipi_hashmask); pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB, &pcbinfo->ipi_porthashmask); - pcbinfo->ipi_lbgrouphashbase = hashinit(hash_nelements, M_PCB, - &pcbinfo->ipi_lbgrouphashmask); #ifdef PCBGROUP in_pcbgroup_init(pcbinfo, hashfields, hash_nelements); #endif @@ -459,8 +275,6 @@ in_pcbinfo_destroy(struct inpcbinfo *pcbinfo) hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask); hashdestroy(pcbinfo->ipi_porthashbase, M_PCB, pcbinfo->ipi_porthashmask); - hashdestroy(pcbinfo->ipi_lbgrouphashbase, M_PCB, - pcbinfo->ipi_lbgrouphashmask); #ifdef PCBGROUP in_pcbgroup_destroy(pcbinfo); #endif @@ -699,20 +513,18 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp, /* * Return cached socket options. */ -int +short inp_so_options(const struct inpcb *inp) { - int so_options; + short so_options; - so_options = 0; + so_options = 0; - if ((inp->inp_flags2 & INP_REUSEPORT_LB) != 0) - so_options |= SO_REUSEPORT_LB; - if ((inp->inp_flags2 & INP_REUSEPORT) != 0) - so_options |= SO_REUSEPORT; - if ((inp->inp_flags2 & INP_REUSEADDR) != 0) - so_options |= SO_REUSEADDR; - return (so_options); + if ((inp->inp_flags2 & INP_REUSEPORT) != 0) + so_options |= SO_REUSEPORT; + if ((inp->inp_flags2 & INP_REUSEADDR) != 0) + so_options |= SO_REUSEADDR; + return (so_options); } #endif /* INET || INET6 */ @@ -768,12 +580,6 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp, int lookupflags = 0, reuseport = (so->so_options & SO_REUSEPORT); int error; - /* - * XXX Maybe we could let SO_REUSEPORT_LB set SO_REUSEPORT bit here - * so that we don't have to add to the (already messy) code below - */ - int reuseport_lb = (so->so_options & SO_REUSEPORT_LB); - /* * No state changes, so read locks are sufficient here. */ @@ -785,7 +591,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp, laddr.s_addr = *laddrp; if (nam != NULL && laddr.s_addr != INADDR_ANY) return (EINVAL); - if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0) + if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) lookupflags = INPLOOKUP_WILDCARD; if (nam == NULL) { if ((error = prison_local_ip4(cred, &laddr)) != 0) @@ -822,20 +628,16 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp, */ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0) reuseport = SO_REUSEADDR|SO_REUSEPORT; - // XXX: How to deal with SO_REUSEPORT_LB here? - // Added equivalent treatment as SO_REUSEPORT here for now - if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT_LB)) != 0) - reuseport_lb = SO_REUSEADDR|SO_REUSEPORT_LB; } else if (sin->sin_addr.s_addr != INADDR_ANY) { sin->sin_port = 0; /* yech... */ bzero(&sin->sin_zero, sizeof(sin->sin_zero)); /* - * Is the address a local IP address? + * Is the address a local IP address? * If INP_BINDANY is set, then the socket may be bound * to any endpoint address, local or not. */ if ((inp->inp_flags & INP_BINDANY) == 0 && - ifa_ifwithaddr_check((struct sockaddr *)sin) == 0) + ifa_ifwithaddr_check((struct sockaddr *)sin) == 0) return (EADDRNOTAVAIL); } laddr = sin->sin_addr; @@ -865,8 +667,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp, ntohl(t->inp_faddr.s_addr) == INADDR_ANY) && (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || ntohl(t->inp_laddr.s_addr) != INADDR_ANY || - (t->inp_flags2 & INP_REUSEPORT) || - (t->inp_flags2 & INP_REUSEPORT_LB) == 0) && + (t->inp_flags2 & INP_REUSEPORT) == 0) && (inp->inp_cred->cr_uid != t->inp_cred->cr_uid)) return (EADDRINUSE); @@ -891,14 +692,11 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp, */ tw = intotw(t); if (tw == NULL || - ((reuseport & tw->tw_so_options) == 0 && - (reuseport_lb & tw->tw_so_options) == 0)) { + (reuseport & tw->tw_so_options) == 0) return (EADDRINUSE); - } } else if (t && - ((inp->inp_flags2 & INP_BINDMULTI) == 0) && - (reuseport & inp_so_options(t)) == 0 && - (reuseport_lb & inp_so_options(t)) == 0) { + ((inp->inp_flags2 & INP_BINDMULTI) == 0) && + (reuseport & inp_so_options(t)) == 0) { #ifdef INET6 if (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || @@ -907,7 +705,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp, (inp->inp_vflag & INP_IPV6PROTO) == 0 || (t->inp_vflag & INP_IPV6PROTO) == 0) #endif - return (EADDRINUSE); + return (EADDRINUSE); if (t && (! in_pcbbind_check_bindmulti(inp, t))) return (EADDRINUSE); } @@ -1611,7 +1409,6 @@ in_pcbdrop(struct inpcb *inp) struct inpcbport *phd = inp->inp_phd; INP_HASH_WLOCK(inp->inp_pcbinfo); - in_pcbremlbgrouphash(inp, inp->inp_pcbinfo); LIST_REMOVE(inp, inp_hash); LIST_REMOVE(inp, inp_portlist); if (LIST_FIRST(&phd->phd_pcblist) == NULL) { @@ -1872,98 +1669,6 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, } #undef INP_LOOKUP_MAPPED_PCB_COST -struct inpcb * -in_pcblookup_lbgroup_last(const struct inpcb *inp) -{ - const struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; - const struct inpcblbgrouphead *hdr; - const struct inpcblbgroup *grp; - int i; - - if (pcbinfo->ipi_lbgrouphashbase == NULL) - return NULL; - - hdr = &pcbinfo->ipi_lbgrouphashbase[ - INP_PCBLBGROUP_PORTHASH(inp->inp_lport, pcbinfo->ipi_lbgrouphashmask)]; - - LIST_FOREACH(grp, hdr, il_list) { - if (grp->il_vflag == inp->inp_vflag && - grp->il_lport == inp->inp_lport && - memcmp(&grp->il_dependladdr, - &inp->inp_inc.inc_ie.ie_dependladdr, - sizeof(grp->il_dependladdr)) == 0) { - break; - } - } - if (grp == NULL || grp->il_inpcnt == 1) - return NULL; - - KASSERT(grp->il_inpcnt >= 2, - ("invalid lbgroup inp count %d", grp->il_inpcnt)); - for (i = 0; i < grp->il_inpcnt; ++i) { - if (grp->il_inp[i] == inp) { - int last = grp->il_inpcnt - 1; - - if (i == last) - last = grp->il_inpcnt - 2; - return grp->il_inp[last]; - } - } - return NULL; -} - -static struct inpcb * -in_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo, - const struct in_addr *laddr, uint16_t lport, const struct in_addr *faddr, - uint16_t fport, int lookupflags) -{ - struct inpcb *local_wild = NULL; - const struct inpcblbgrouphead *hdr; - struct inpcblbgroup *grp; - struct inpcblbgroup *grp_local_wild; - - hdr = &pcbinfo->ipi_lbgrouphashbase[ - INP_PCBLBGROUP_PORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)]; - - /* - * Order of socket selection: - * 1. non-wild. - * 2. wild (if lookupflags contains INPLOOKUP_WILDCARD). - * - * NOTE: - * - Load balanced group does not contain jailed sockets - * - Load balanced group does not contain IPv4 mapped INET6 wild sockets - */ - LIST_FOREACH(grp, hdr, il_list) { -#ifdef INET6 - if (!(grp->il_vflag & INP_IPV4)) - continue; -#endif - - if (grp->il_lport == lport) { - - uint32_t idx = 0; - int pkt_hash = INP_PCBLBGROUP_PKTHASH(faddr->s_addr, lport, fport); - - idx = pkt_hash % grp->il_inpcnt; - - if (grp->il_laddr.s_addr == laddr->s_addr) { - return grp->il_inp[idx]; - } else { - if (grp->il_laddr.s_addr == INADDR_ANY && - (lookupflags & INPLOOKUP_WILDCARD)) { - local_wild = grp->il_inp[idx]; - grp_local_wild = grp; - } - } - } - } - if (local_wild != NULL) { - return local_wild; - } - return NULL; -} - #ifdef PCBGROUP /* * Lookup PCB in hash list, using pcbgroup tables. @@ -2242,18 +1947,6 @@ in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr, if (tmpinp != NULL) return (tmpinp); - /* - * Then look in lb group (for wildcard match) - */ - if (pcbinfo->ipi_lbgrouphashbase != NULL && - (lookupflags & INPLOOKUP_WILDCARD)) { - inp = in_pcblookup_lbgroup(pcbinfo, &laddr, lport, &faddr, fport, - lookupflags); - if (inp != NULL) { - return inp; - } - } - /* * Then look for a wildcard match, if requested. */ @@ -2471,7 +2164,6 @@ in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update) struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct inpcbport *phd; u_int32_t hashkey_faddr; - int so_options; INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); @@ -2492,20 +2184,6 @@ in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update) pcbporthash = &pcbinfo->ipi_porthashbase[ INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)]; - - /* - * Add entry in lb group - * Only do this if SO_REUSEPORT_LB is set - */ - so_options = inp_so_options(inp); - if(so_options & SO_REUSEPORT_LB) { - int ret = in_pcbinslbgrouphash(inp, pcbinfo); - if(ret) { - // pcb lb group malloc fail (ret=ENOBUFS) - return ret; - } - } - /* * Go through port list and look for a head for this lport. */ @@ -2632,10 +2310,6 @@ in_pcbremlists(struct inpcb *inp) struct inpcbport *phd = inp->inp_phd; INP_HASH_WLOCK(pcbinfo); - - // XXX Only do if SO_REUSEPORT_LB set? - in_pcbremlbgrouphash(inp, pcbinfo); - LIST_REMOVE(inp, inp_hash); LIST_REMOVE(inp, inp_portlist); if (LIST_FIRST(&phd->phd_pcblist) == NULL) { diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index ac2d53a88a98..f4b6da20a2db 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -78,11 +78,6 @@ struct in_addr_4in6 { struct in_addr ia46_addr4; }; -union in_dependaddr { - struct in_addr_4in6 id46_addr; - struct in6_addr id6_addr; -}; - /* * NOTE: ipv6 addrs should be 64-bit aligned, per RFC 2553. in_conninfo has * some extra padding to accomplish this. @@ -93,14 +88,22 @@ struct in_endpoints { u_int16_t ie_fport; /* foreign port */ u_int16_t ie_lport; /* local port */ /* protocol dependent part, local and foreign addr */ - union in_dependaddr ie_dependfaddr; /* foreign host table entry */ - union in_dependaddr ie_dependladdr; /* local host table entry */ -#define ie_faddr ie_dependfaddr.id46_addr.ia46_addr4 -#define ie_laddr ie_dependladdr.id46_addr.ia46_addr4 -#define ie6_faddr ie_dependfaddr.id6_addr -#define ie6_laddr ie_dependladdr.id6_addr + union { + /* foreign host table entry */ + struct in_addr_4in6 ie46_foreign; + struct in6_addr ie6_foreign; + } ie_dependfaddr; + union { + /* local host table entry */ + struct in_addr_4in6 ie46_local; + struct in6_addr ie6_local; + } ie_dependladdr; u_int32_t ie6_zoneid; /* scope zone id */ }; +#define ie_faddr ie_dependfaddr.ie46_foreign.ia46_addr4 +#define ie_laddr ie_dependladdr.ie46_local.ia46_addr4 +#define ie6_faddr ie_dependfaddr.ie6_foreign +#define ie6_laddr ie_dependladdr.ie6_local /* * XXX The defines for inc_* are hacks and should be changed to direct @@ -404,21 +407,6 @@ struct inpcbport { u_short phd_port; }; -struct inpcblbgroup { - LIST_ENTRY(inpcblbgroup) il_list; - uint16_t il_lport; - u_char il_vflag; - u_char il_pad; - uint32_t il_pad2; - union in_dependaddr il_dependladdr; -#define il_laddr il_dependladdr.id46_addr.ia46_addr4 -#define il6_laddr il_dependladdr.id6_addr - uint32_t il_inpsiz; /* size of il_inp[] */ - uint32_t il_inpcnt; /* # of elem in il_inp[] */ - struct inpcb *il_inp[]; -}; -LIST_HEAD(inpcblbgrouphead, inpcblbgroup); - /*- * Global data structure for each high-level protocol (UDP, TCP, ...) in both * IPv4 and IPv6. Holds inpcb lists and information for managing them. @@ -511,13 +499,6 @@ struct inpcbinfo { struct inpcbhead *ipi_wildbase; /* (p) */ u_long ipi_wildmask; /* (p) */ - /* - * Load balanced group used by the SO_REUSEPORT_LB option, - * hashed by local address and local port. - */ - struct inpcblbgrouphead *ipi_lbgrouphashbase; - u_long ipi_lbgrouphashmask; - /* * Pointer to network stack instance */ @@ -604,7 +585,7 @@ struct tcpcb * inp_inpcbtotcpcb(struct inpcb *inp); void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, uint32_t *faddr, uint16_t *fp); -int inp_so_options(const struct inpcb *inp); +short inp_so_options(const struct inpcb *inp); #endif /* _KERNEL */ @@ -667,10 +648,6 @@ int inp_so_options(const struct inpcb *inp); (((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask)) #define INP_PCBPORTHASH(lport, mask) \ (ntohs((lport)) & (mask)) -#define INP_PCBLBGROUP_PORTHASH(lport, mask) \ - (ntohs((lport)) & (mask)) -#define INP_PCBLBGROUP_PKTHASH(faddr, lport, fport) \ - ((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) #define INP6_PCBHASHKEY(faddr) ((faddr)->s6_addr32[3]) /* @@ -739,7 +716,6 @@ int inp_so_options(const struct inpcb *inp); #define INP_RATE_LIMIT_CHANGED 0x00000400 /* rate limit needs attention */ #define INP_ORIGDSTADDR 0x00000800 /* receive IP dst address/port */ #define INP_CANNOT_DO_ECN 0x00001000 /* The stack does not do ECN */ -#define INP_REUSEPORT_LB 0x00002000 /* SO_REUSEPORT_LB option is set */ /* * Flags passed to in_pcblookup*() functions. @@ -842,8 +818,6 @@ struct inpcb * struct inpcb * in_pcblookup(struct inpcbinfo *, struct in_addr, u_int, struct in_addr, u_int, int, struct ifnet *); -struct inpcb * - in_pcblookup_lbgroup_last(const struct inpcb *inp); struct inpcb * in_pcblookup_mbuf(struct inpcbinfo *, struct in_addr, u_int, struct in_addr, u_int, int, struct ifnet *, struct mbuf *); diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 047a0646429b..1a594cd1b680 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -986,15 +986,6 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) INP_WUNLOCK(inp); error = 0; break; - case SO_REUSEPORT_LB: - INP_WLOCK(inp); - if ((so->so_options & SO_REUSEPORT_LB) != 0) - inp->inp_flags2 |= INP_REUSEPORT_LB; - else - inp->inp_flags2 &= ~INP_REUSEPORT_LB; - INP_WUNLOCK(inp); - error = 0; - break; case SO_SETFIB: INP_WLOCK(inp); inp->inp_inc.inc_fibnum = so->so_fibnum; diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index de0d52900515..5397b9f7bc44 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1956,28 +1956,10 @@ tcp_close(struct tcpcb *tp) { struct inpcb *inp = tp->t_inpcb; struct socket *so; - struct inpcb *inp_inh = NULL; - int listen = tp->t_state & TCPS_LISTEN; INP_INFO_LOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); - if (listen) { - /* - * Pending socket/syncache inheritance - * - * If this is a listen(2) socket, find another listen(2) - * socket in the same local group, which could inherit - * the syncache and sockets pending on the completion - * and incompletion queues. - * - * NOTE: - * Currently the inheritance could only happen on the - * listen(2) sockets with SO_REUSEPORT_LB set. - */ - inp_inh = in_pcblookup_lbgroup_last(inp); - } - #ifdef TCP_OFFLOAD if (tp->t_state == TCPS_LISTEN) tcp_offload_listen_stop(tp); @@ -1997,16 +1979,7 @@ tcp_close(struct tcpcb *tp) tcp_state_change(tp, TCPS_CLOSED); KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL")); so = inp->inp_socket; - soisdisconnected(so); - - if(listen) - { - if(inp_inh != NULL && inp_inh->inp_socket != NULL) { - soinherit(so, inp_inh->inp_socket); - } - } - if (inp->inp_flags & INP_SOCKREF) { KASSERT(so->so_state & SS_PROTOREF, ("tcp_close: !SS_PROTOREF")); diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index bcafefc43997..670182ece8b8 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -612,7 +612,7 @@ udp_input(struct mbuf **mp, int *offp, int proto) * will never clear these options after setting them. */ if ((last->inp_socket->so_options & - (SO_REUSEPORT|SO_REUSEPORT_LB|SO_REUSEADDR)) == 0) + (SO_REUSEPORT|SO_REUSEADDR)) == 0) break; } diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index 1e3425a094b7..2d560a10e63f 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -125,12 +125,6 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, int error, lookupflags = 0; int reuseport = (so->so_options & SO_REUSEPORT); - /* - * XXX Maybe we could let SO_REUSEPORT_LB set SO_REUSEPORT bit here - * so that we don't have to add to the (already messy) code below - */ - int reuseport_lb = (so->so_options & SO_REUSEPORT_LB); - INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); @@ -138,7 +132,7 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, return (EADDRNOTAVAIL); if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) return (EINVAL); - if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0) + if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) lookupflags = INPLOOKUP_WILDCARD; if (nam == NULL) { if ((error = prison_local_ip6(cred, &inp->in6p_laddr, @@ -172,10 +166,6 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, */ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0) reuseport = SO_REUSEADDR|SO_REUSEPORT; - // XXX: How to deal with SO_REUSEPORT_LB here? - // Added equivalent treatment as SO_REUSEPORT here for now - if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT_LB)) != 0) - reuseport_lb = SO_REUSEADDR|SO_REUSEPORT_LB; } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct ifaddr *ifa; @@ -224,8 +214,7 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) && (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) || - (t->inp_flags2 & INP_REUSEPORT) || - (t->inp_flags2 & INP_REUSEPORT_LB) == 0) && + (t->inp_flags2 & INP_REUSEPORT) == 0) && (inp->inp_cred->cr_uid != t->inp_cred->cr_uid)) return (EADDRINUSE); @@ -275,39 +264,34 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, */ tw = intotw(t); if (tw == NULL || - ((reuseport & tw->tw_so_options) == 0 && - (reuseport_lb & tw->tw_so_options) == 0)) + (reuseport & tw->tw_so_options) == 0) return (EADDRINUSE); - } else if (t && (reuseport & inp_so_options(t)) == 0 && - (reuseport_lb & inp_so_options(t)) == 0) { + } else if (t && (reuseport & inp_so_options(t)) == 0) { return (EADDRINUSE); } #ifdef INET if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && - IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6); t = in_pcblookup_local(pcbinfo, sin.sin_addr, - lport, lookupflags, cred); + lport, lookupflags, cred); if (t && t->inp_flags & INP_TIMEWAIT) { tw = intotw(t); if (tw == NULL) return (EADDRINUSE); if ((reuseport & tw->tw_so_options) == 0 - && (reuseport_lb & tw->tw_so_options) == 0 - && (ntohl(t->inp_laddr.s_addr) != - INADDR_ANY || ((inp->inp_vflag & - INP_IPV6PROTO) == - (t->inp_vflag & INP_IPV6PROTO)))) + && (ntohl(t->inp_laddr.s_addr) != + INADDR_ANY || ((inp->inp_vflag & + INP_IPV6PROTO) == + (t->inp_vflag & INP_IPV6PROTO)))) return (EADDRINUSE); } else if (t && - (reuseport & inp_so_options(t)) == 0 && - (reuseport_lb & inp_so_options(t)) == 0 && - (ntohl(t->inp_laddr.s_addr) != INADDR_ANY || - (t->inp_vflag & INP_IPV6PROTO) != 0)) { + (reuseport & inp_so_options(t)) == 0 && + (ntohl(t->inp_laddr.s_addr) != INADDR_ANY || + (t->inp_vflag & INP_IPV6PROTO) != 0)) return (EADDRINUSE); - } } #endif } @@ -872,54 +856,6 @@ in6_rtchange(struct inpcb *inp, int errno) return inp; } -static struct inpcb * -in6_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo, - const struct in6_addr *laddr, uint16_t lport, const struct in6_addr *faddr, - uint16_t fport, int lookupflags) -{ - struct inpcb *local_wild = NULL; - const struct inpcblbgrouphead *hdr; - struct inpcblbgroup *grp; - struct inpcblbgroup *grp_local_wild; - - hdr = &pcbinfo->ipi_lbgrouphashbase[ - INP_PCBLBGROUP_PORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)]; - - /* - * Order of socket selection: - * 1. non-wild. - * 2. wild (if lookupflags contains INPLOOKUP_WILDCARD). - * - * NOTE: - * - Load balanced group does not contain jailed sockets - * - Load balanced does not contain IPv4 mapped INET6 wild sockets - */ - LIST_FOREACH(grp, hdr, il_list) { - - if (grp->il_lport == lport) { - uint32_t idx = 0; - int pkt_hash = INP_PCBLBGROUP_PKTHASH( - INP6_PCBHASHKEY(faddr), lport, fport); - - idx = pkt_hash % grp->il_inpcnt; - - if (IN6_ARE_ADDR_EQUAL(&grp->il6_laddr, laddr)) { - return grp->il_inp[idx]; - } else { - if (IN6_IS_ADDR_UNSPECIFIED(&grp->il6_laddr) && - (lookupflags & INPLOOKUP_WILDCARD)) { - local_wild = grp->il_inp[idx]; - grp_local_wild = grp; - } - } - } - } - if (local_wild != NULL) { - return local_wild; - } - return NULL; -} - #ifdef PCBGROUP /* * Lookup PCB in hash list, using pcbgroup tables. @@ -1121,8 +1057,6 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, } #endif /* PCBGROUP */ - - /* * Lookup PCB in hash list. */ @@ -1168,18 +1102,6 @@ in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, if (tmpinp != NULL) return (tmpinp); - /* - * Then look in lb group (for wildcard match) - */ - if (pcbinfo->ipi_lbgrouphashbase != NULL && - (lookupflags & INPLOOKUP_WILDCARD)) { - inp = in6_pcblookup_lbgroup(pcbinfo, laddr, lport, faddr, - fport, lookupflags); - if (inp != NULL) { - return inp; - } - } - /* * Then look for a wildcard match, if requested. */ diff --git a/sys/netinet6/in6_src.c b/sys/netinet6/in6_src.c index 9ccd15338234..7b3b5553c2e8 100644 --- a/sys/netinet6/in6_src.c +++ b/sys/netinet6/in6_src.c @@ -973,7 +973,7 @@ in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred) return(error); /* XXX: this is redundant when called from in6_pcbbind */ - if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0) + if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) lookupflags = INPLOOKUP_WILDCARD; inp->inp_flags |= INP_ANONPORT; diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index f854b916fcfe..7525b289279a 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -1454,15 +1454,6 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt) INP_WUNLOCK(in6p); error = 0; break; - case SO_REUSEPORT_LB: - INP_WLOCK(in6p); - if ((so->so_options & SO_REUSEPORT_LB) != 0) - in6p->inp_flags2 |= INP_REUSEPORT_LB; - else - in6p->inp_flags2 &= ~INP_REUSEPORT_LB; - INP_WUNLOCK(in6p); - error = 0; - break; case SO_SETFIB: INP_WLOCK(in6p); in6p->inp_inc.inc_fibnum = so->so_fibnum; diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c index 0169332606b0..fb4b63df37a7 100644 --- a/sys/netinet6/udp6_usrreq.c +++ b/sys/netinet6/udp6_usrreq.c @@ -399,7 +399,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto) * will never clear these options after setting them. */ if ((last->inp_socket->so_options & - (SO_REUSEPORT|SO_REUSEPORT_LB|SO_REUSEADDR)) == 0) + (SO_REUSEPORT|SO_REUSEADDR)) == 0) break; } diff --git a/sys/sys/socket.h b/sys/sys/socket.h index 1ee43e7c5a51..2e6e9fc5550a 100644 --- a/sys/sys/socket.h +++ b/sys/sys/socket.h @@ -147,9 +147,6 @@ typedef __uintptr_t uintptr_t; #define SO_NO_OFFLOAD 0x4000 /* socket cannot be offloaded */ #define SO_NO_DDP 0x8000 /* disable direct data placement */ -// XXX: so_options was only 16 bit, now globally increased to 32 bit -#define SO_REUSEPORT_LB 0x00010000 /* reuse with load balancing */ - /* * Additional options, not kept in so_options. */ diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index 946c342ea8d1..cf0b0deaeee6 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -84,7 +84,7 @@ struct socket { struct selinfo so_rdsel; /* (b/cr) for so_rcv/so_comp */ struct selinfo so_wrsel; /* (b/cs) for so_snd */ short so_type; /* (a) generic type, see socket.h */ - int so_options; /* (b) from socket call, see socket.h */ + short so_options; /* (b) from socket call, see socket.h */ short so_linger; /* time to linger close(2) */ short so_state; /* (b) internal state flags SS_* */ void *so_pcb; /* protocol control block */ @@ -399,7 +399,6 @@ int socreate(int dom, struct socket **aso, int type, int proto, int sodisconnect(struct socket *so); struct sockaddr *sodupsockaddr(const struct sockaddr *sa, int mflags); void sofree(struct socket *so); -void soinherit(struct socket *so, struct socket *so_inh); void sohasoutofband(struct socket *so); int solisten(struct socket *so, int backlog, struct thread *td); void solisten_proto(struct socket *so, int backlog);