Improved connection establishment performance by doing local port lookups via

a hashed port list. In the new scheme, in_pcblookup() goes away and is
replaced by a new routine, in_pcblookup_local() for doing the local port
check. Note that this implementation is space inefficient in that the PCB
struct is now too large to fit into 128 bytes. I might deal with this in the
future by using the new zone allocator, but I wanted these changes to be
extensively tested in their current form first.

Also:
1) Fixed off-by-one errors in the port lookup loops in in_pcbbind().
2) Got rid of some unneeded rehashing. Adding a new routine, in_pcbinshash()
   to do the initialial hash insertion.
3) Renamed in_pcblookuphash() to in_pcblookup_hash() for easier readability.
4) Added a new routine, in_pcbremlists() to remove the PCB from the various
   hash lists.
5) Added/deleted comments where appropriate.
6) Removed unnecessary splnet() locking. In general, the PCB functions should
   be called at splnet()...there are unfortunately a few exceptions, however.
7) Reorganized a few structs for better cache line behavior.
8) Killed my TCP_ACK_HACK kludge. It may come back in a different form in
   the future, however.

These changes have been tested on wcarchive for more than a month. In tests
done here, connection establishment overhead is reduced by more than 50
times, thus getting rid of one of the major networking scalability problems.

Still to do: make tcp_fastimo/tcp_slowtimo scale well for systems with a
large number of connections. tcp_fastimo is easy; tcp_slowtimo is difficult.

WARNING: Anything that knows about inpcb and tcpcb structs will have to be
         recompiled; at the very least, this includes netstat(1).
This commit is contained in:
David Greenman 1998-01-27 09:15:13 +00:00
parent 5be975adf0
commit c3229e05a3
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=32821
11 changed files with 311 additions and 244 deletions

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
* $Id: in_pcb.c,v 1.36 1997/12/23 01:40:40 alex Exp $
* $Id: in_pcb.c,v 1.37 1997/12/25 06:57:36 davidg Exp $
*/
#include <sys/param.h>
@ -55,8 +55,8 @@
struct in_addr zeroin_addr;
static void in_pcbinshash __P((struct inpcb *));
static void in_rtchange __P((struct inpcb *, int));
static void in_pcbremlists __P((struct inpcb *));
static void in_rtchange __P((struct inpcb *, int));
/*
* These configure the range of local port addresses assigned to
@ -106,6 +106,17 @@ SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW,
SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW,
&ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
/*
* in_pcb.c: manage the Protocol Control Blocks.
*
* NOTE: It is assumed that most of these functions will be called at
* splnet(). XXX - There are, unfortunately, a few exceptions to this
* rule that should be fixed.
*/
/*
* Allocate a PCB and associate it with the socket.
*/
int
in_pcballoc(so, pcbinfo, p)
struct socket *so;
@ -113,7 +124,6 @@ in_pcballoc(so, pcbinfo, p)
struct proc *p;
{
register struct inpcb *inp;
int s;
MALLOC(inp, struct inpcb *, sizeof(*inp), M_PCB,
p ? M_WAITOK : M_NOWAIT);
@ -122,10 +132,7 @@ in_pcballoc(so, pcbinfo, p)
bzero((caddr_t)inp, sizeof(*inp));
inp->inp_pcbinfo = pcbinfo;
inp->inp_socket = so;
s = splnet();
LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
in_pcbinshash(inp);
splx(s);
so->so_pcb = (caddr_t)inp;
return (0);
}
@ -139,6 +146,7 @@ in_pcbbind(inp, nam, p)
register struct socket *so = inp->inp_socket;
unsigned short *lastport;
struct sockaddr_in *sin;
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
u_short lport = 0;
int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
int error;
@ -147,9 +155,7 @@ in_pcbbind(inp, nam, p)
return (EADDRNOTAVAIL);
if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
return (EINVAL);
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 &&
((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 ||
(so->so_options & SO_ACCEPTCONN) == 0))
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
wild = 1;
if (nam) {
sin = (struct sockaddr_in *)nam;
@ -186,8 +192,8 @@ in_pcbbind(inp, nam, p)
if (ntohs(lport) < IPPORT_RESERVED && p &&
suser(p->p_ucred, &p->p_acflag))
return (EACCES);
t = in_pcblookup(inp->inp_pcbinfo, zeroin_addr, 0,
sin->sin_addr, lport, wild);
t = in_pcblookup_local(pcbinfo, sin->sin_addr,
lport, wild);
if (t && (reuseport & t->inp_socket->so_options) == 0)
return (EADDRINUSE);
}
@ -202,17 +208,17 @@ in_pcbbind(inp, nam, p)
if (inp->inp_flags & INP_HIGHPORT) {
first = ipport_hifirstauto; /* sysctl */
last = ipport_hilastauto;
lastport = &inp->inp_pcbinfo->lasthi;
lastport = &pcbinfo->lasthi;
} else if (inp->inp_flags & INP_LOWPORT) {
if (p && (error = suser(p->p_ucred, &p->p_acflag)))
return error;
first = ipport_lowfirstauto; /* 1023 */
last = ipport_lowlastauto; /* 600 */
lastport = &inp->inp_pcbinfo->lastlow;
lastport = &pcbinfo->lastlow;
} else {
first = ipport_firstauto; /* sysctl */
last = ipport_lastauto;
lastport = &inp->inp_pcbinfo->lastport;
lastport = &pcbinfo->lastport;
}
/*
* Simple check to ensure all ports are not used up causing
@ -228,14 +234,20 @@ in_pcbbind(inp, nam, p)
count = first - last;
do {
if (count-- <= 0) /* completely used? */
return (EADDRNOTAVAIL);
if (count-- < 0) { /* completely used? */
/*
* Undo any address bind that may have
* occurred above.
*/
inp->inp_laddr.s_addr = INADDR_ANY;
return (EAGAIN);
}
--*lastport;
if (*lastport > first || *lastport < last)
*lastport = first;
lport = htons(*lastport);
} while (in_pcblookup(inp->inp_pcbinfo,
zeroin_addr, 0, inp->inp_laddr, lport, wild));
} while (in_pcblookup_local(pcbinfo,
inp->inp_laddr, lport, wild));
} else {
/*
* counting up
@ -243,18 +255,28 @@ in_pcbbind(inp, nam, p)
count = last - first;
do {
if (count-- <= 0) /* completely used? */
return (EADDRNOTAVAIL);
if (count-- < 0) { /* completely used? */
/*
* Undo any address bind that may have
* occurred above.
*/
inp->inp_laddr.s_addr = INADDR_ANY;
return (EAGAIN);
}
++*lastport;
if (*lastport < first || *lastport > last)
*lastport = first;
lport = htons(*lastport);
} while (in_pcblookup(inp->inp_pcbinfo,
zeroin_addr, 0, inp->inp_laddr, lport, wild));
} while (in_pcblookup_local(pcbinfo,
inp->inp_laddr, lport, wild));
}
}
inp->inp_lport = lport;
in_pcbrehash(inp);
if (in_pcbinshash(inp) != 0) {
inp->inp_laddr.s_addr = INADDR_ANY;
inp->inp_lport = 0;
return (EAGAIN);
}
return (0);
}
@ -403,10 +425,11 @@ in_pcbconnect(inp, nam, p)
if (error = in_pcbladdr(inp, nam, &ifaddr))
return(error);
if (in_pcblookuphash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
if (in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
inp->inp_lport, 0) != NULL)
inp->inp_lport, 0) != NULL) {
return (EADDRINUSE);
}
if (inp->inp_laddr.s_addr == INADDR_ANY) {
if (inp->inp_lport == 0)
(void)in_pcbbind(inp, (struct sockaddr *)0, p);
@ -435,8 +458,8 @@ in_pcbdetach(inp)
struct inpcb *inp;
{
struct socket *so = inp->inp_socket;
int s;
in_pcbremlists(inp);
so->so_pcb = 0;
sofree(so);
if (inp->inp_options)
@ -444,10 +467,6 @@ in_pcbdetach(inp)
if (inp->inp_route.ro_rt)
rtfree(inp->inp_route.ro_rt);
ip_freemoptions(inp->inp_moptions);
s = splnet();
LIST_REMOVE(inp, inp_hash);
LIST_REMOVE(inp, inp_list);
splx(s);
FREE(inp, M_PCB);
}
@ -470,6 +489,9 @@ in_setsockaddr(so, nam)
register struct inpcb *inp;
register struct sockaddr_in *sin;
/*
* Do the malloc first in case it blocks.
*/
MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK);
bzero(sin, sizeof *sin);
sin->sin_family = AF_INET;
@ -499,6 +521,9 @@ in_setpeeraddr(so, nam)
struct inpcb *inp;
register struct sockaddr_in *sin;
/*
* Do the malloc first in case it blocks.
*/
MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK);
bzero((caddr_t)sin, sizeof (*sin));
sin->sin_family = AF_INET;
@ -527,8 +552,6 @@ in_setpeeraddr(so, nam)
* cmds that are uninteresting (e.g., no error in the map).
* Call the protocol specific routine (if any) to report
* any errors for each matching socket.
*
* Must be called at splnet.
*/
void
in_pcbnotify(head, dst, fport_arg, laddr, lport_arg, cmd, notify)
@ -636,62 +659,94 @@ in_rtchange(inp, errno)
}
}
/*
* Lookup a PCB based on the local address and port.
*/
struct inpcb *
in_pcblookup(pcbinfo, faddr, fport_arg, laddr, lport_arg, wild_okay)
in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
struct inpcbinfo *pcbinfo;
struct in_addr faddr, laddr;
u_int fport_arg, lport_arg;
struct in_addr laddr;
u_int lport_arg;
int wild_okay;
{
register struct inpcb *inp, *match = NULL;
int matchwild = 3, wildcard;
u_short fport = fport_arg, lport = lport_arg;
int s;
u_short lport = lport_arg;
s = splnet();
for (inp = pcbinfo->listhead->lh_first; inp != NULL; inp = inp->inp_list.le_next) {
if (inp->inp_lport != lport)
continue;
wildcard = 0;
if (inp->inp_faddr.s_addr != INADDR_ANY) {
if (faddr.s_addr == INADDR_ANY)
wildcard++;
else if (inp->inp_faddr.s_addr != faddr.s_addr ||
inp->inp_fport != fport)
continue;
} else {
if (faddr.s_addr != INADDR_ANY)
wildcard++;
}
if (inp->inp_laddr.s_addr != INADDR_ANY) {
if (laddr.s_addr == INADDR_ANY)
wildcard++;
else if (inp->inp_laddr.s_addr != laddr.s_addr)
continue;
} else {
if (laddr.s_addr != INADDR_ANY)
wildcard++;
}
if (wildcard && wild_okay == 0)
continue;
if (wildcard < matchwild) {
match = inp;
matchwild = wildcard;
if (matchwild == 0) {
break;
if (!wild_okay) {
struct inpcbhead *head;
/*
* Look for an unconnected (wildcard foreign addr) PCB that
* matches the local address and port we're looking for.
*/
head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
if (inp->inp_faddr.s_addr == INADDR_ANY &&
inp->inp_laddr.s_addr == laddr.s_addr &&
inp->inp_lport == lport) {
/*
* Found.
*/
return (inp);
}
}
/*
* Not found.
*/
return (NULL);
} else {
struct inpcbporthead *porthash;
struct inpcbport *phd;
struct inpcb *match = NULL;
/*
* Best fit PCB lookup.
*
* First see if this local port is in use by looking on the
* port hash list.
*/
porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
pcbinfo->porthashmask)];
for (phd = porthash->lh_first; phd != NULL; phd = phd->phd_hash.le_next) {
if (phd->phd_port == lport)
break;
}
if (phd != NULL) {
/*
* Port is in use by one or more PCBs. Look for best
* fit.
*/
for (inp = phd->phd_pcblist.lh_first; inp != NULL;
inp = inp->inp_portlist.le_next) {
wildcard = 0;
if (inp->inp_faddr.s_addr != INADDR_ANY)
wildcard++;
if (inp->inp_laddr.s_addr != INADDR_ANY) {
if (laddr.s_addr == INADDR_ANY)
wildcard++;
else if (inp->inp_laddr.s_addr != laddr.s_addr)
continue;
} else {
if (laddr.s_addr != INADDR_ANY)
wildcard++;
}
if (wildcard < matchwild) {
match = inp;
matchwild = wildcard;
if (matchwild == 0) {
break;
}
}
}
}
return (match);
}
splx(s);
return (match);
}
/*
* Lookup PCB in hash list.
*/
struct inpcb *
in_pcblookuphash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard)
in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard)
struct inpcbinfo *pcbinfo;
struct in_addr faddr, laddr;
u_int fport_arg, lport_arg;
@ -700,9 +755,7 @@ in_pcblookuphash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard)
struct inpcbhead *head;
register struct inpcb *inp;
u_short fport = fport_arg, lport = lport_arg;
int s;
s = splnet();
/*
* First look for an exact match.
*/
@ -711,8 +764,12 @@ in_pcblookuphash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard)
if (inp->inp_faddr.s_addr == faddr.s_addr &&
inp->inp_laddr.s_addr == laddr.s_addr &&
inp->inp_fport == fport &&
inp->inp_lport == lport)
goto found;
inp->inp_lport == lport) {
/*
* Found.
*/
return (inp);
}
}
if (wildcard) {
struct inpcb *local_wild = NULL;
@ -720,64 +777,100 @@ in_pcblookuphash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard)
head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
if (inp->inp_faddr.s_addr == INADDR_ANY &&
inp->inp_fport == 0 && inp->inp_lport == lport) {
inp->inp_lport == lport) {
if (inp->inp_laddr.s_addr == laddr.s_addr)
goto found;
return (inp);
else if (inp->inp_laddr.s_addr == INADDR_ANY)
local_wild = inp;
}
}
if (local_wild != NULL) {
inp = local_wild;
goto found;
}
return (local_wild);
}
splx(s);
return (NULL);
found:
/*
* Move PCB to head of this hash chain so that it can be
* found more quickly in the future.
* XXX - this is a pessimization on machines with few
* concurrent connections.
* Not found.
*/
if (inp != head->lh_first) {
LIST_REMOVE(inp, inp_hash);
LIST_INSERT_HEAD(head, inp, inp_hash);
}
splx(s);
return (inp);
return (NULL);
}
/*
* Insert PCB into hash chain. Must be called at splnet.
* Insert PCB onto various hash lists.
*/
static void
int
in_pcbinshash(inp)
struct inpcb *inp;
{
struct inpcbhead *head;
struct inpcbhead *pcbhash;
struct inpcbporthead *pcbporthash;
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
struct inpcbport *phd;
head = &inp->inp_pcbinfo->hashbase[INP_PCBHASH(inp->inp_faddr.s_addr,
inp->inp_lport, inp->inp_fport, inp->inp_pcbinfo->hashmask)];
pcbhash = &pcbinfo->hashbase[INP_PCBHASH(inp->inp_faddr.s_addr,
inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)];
LIST_INSERT_HEAD(head, inp, inp_hash);
pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport,
pcbinfo->porthashmask)];
/*
* Go through port list and look for a head for this lport.
*/
for (phd = pcbporthash->lh_first; phd != NULL; phd = phd->phd_hash.le_next) {
if (phd->phd_port == inp->inp_lport)
break;
}
/*
* If none exists, malloc one and tack it on.
*/
if (phd == NULL) {
MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT);
if (phd == NULL) {
return (ENOBUFS); /* XXX */
}
phd->phd_port = inp->inp_lport;
LIST_INIT(&phd->phd_pcblist);
LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
}
inp->inp_phd = phd;
LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
return (0);
}
/*
* Move PCB to the proper hash bucket when { faddr, fport } have been
* changed. NOTE: This does not handle the case of the lport changing (the
* hashed port list would have to be updated as well), so the lport must
* not change after in_pcbinshash() has been called.
*/
void
in_pcbrehash(inp)
struct inpcb *inp;
{
struct inpcbhead *head;
int s;
s = splnet();
LIST_REMOVE(inp, inp_hash);
head = &inp->inp_pcbinfo->hashbase[INP_PCBHASH(inp->inp_faddr.s_addr,
inp->inp_lport, inp->inp_fport, inp->inp_pcbinfo->hashmask)];
LIST_REMOVE(inp, inp_hash);
LIST_INSERT_HEAD(head, inp, inp_hash);
splx(s);
}
/*
* Remove PCB from various lists.
*/
static void
in_pcbremlists(inp)
struct inpcb *inp;
{
if (inp->inp_lport) {
struct inpcbport *phd = inp->inp_phd;
LIST_REMOVE(inp, inp_hash);
LIST_REMOVE(inp, inp_portlist);
if (phd->phd_pcblist.lh_first == NULL) {
LIST_REMOVE(phd, phd_hash);
free(phd, M_PCB);
}
}
LIST_REMOVE(inp, inp_list);
}

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)in_pcb.h 8.1 (Berkeley) 6/10/93
* $Id: in_pcb.h,v 1.21 1997/04/27 20:01:04 wollman Exp $
* $Id: in_pcb.h,v 1.22 1997/08/16 19:15:36 wollman Exp $
*/
#ifndef _NETINET_IN_PCB_H_
@ -47,16 +47,17 @@
* control block.
*/
LIST_HEAD(inpcbhead, inpcb);
LIST_HEAD(inpcbporthead, inpcbport);
struct inpcb {
LIST_ENTRY(inpcb) inp_list; /* list for all PCBs of this proto */
LIST_ENTRY(inpcb) inp_hash; /* hash list */
struct inpcbinfo *inp_pcbinfo; /* PCB list info */
LIST_ENTRY(inpcb) inp_hash; /* hash list */
struct in_addr inp_faddr; /* foreign host table entry */
struct in_addr inp_laddr; /* local host table entry */
u_short inp_fport; /* foreign port */
u_short inp_lport; /* local port */
LIST_ENTRY(inpcb) inp_list; /* list for all PCBs of this proto */
caddr_t inp_ppcb; /* pointer to per-protocol pcb */
struct inpcbinfo *inp_pcbinfo; /* PCB list info */
struct socket *inp_socket; /* back pointer to socket */
struct mbuf *inp_options; /* IP options */
struct route inp_route; /* placeholder for routing entry */
@ -66,22 +67,31 @@ struct inpcb {
u_char inp_ip_p; /* protocol proto */
u_char pad[1]; /* alignment */
struct ip_moptions *inp_moptions; /* IP multicast options */
#if 0 /* Someday, perhaps... */
struct ip inp_ip; /* header prototype; should have more */
#endif
LIST_ENTRY(inpcb) inp_portlist; /* list for this PCB's local port */
struct inpcbport *inp_phd; /* head of list for this PCB's local port */
};
struct inpcbport {
LIST_ENTRY(inpcbport) phd_hash;
struct inpcbhead phd_pcblist;
u_short phd_port;
};
struct inpcbinfo {
struct inpcbhead *listhead;
struct inpcbhead *hashbase;
unsigned long hashmask;
struct inpcbporthead *porthashbase;
unsigned long porthashmask;
struct inpcbhead *listhead;
unsigned short lastport;
unsigned short lastlow;
unsigned short lasthi;
};
#define INP_PCBHASH(faddr, lport, fport, mask) \
(((faddr) ^ ((faddr) >> 16) ^ (lport) ^ (fport)) & (mask))
(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
#define INP_PCBPORTHASH(lport, mask) \
(ntohs((lport)) & (mask))
/* flags in inp_flags: */
#define INP_RECVOPTS 0x01 /* receive incoming IP options */
@ -101,25 +111,26 @@ struct inpcbinfo {
#define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb)
#ifdef KERNEL
void in_losing __P((struct inpcb *));
int in_pcballoc __P((struct socket *, struct inpcbinfo *, struct proc *));
int in_pcbbind __P((struct inpcb *, struct sockaddr *, struct proc *));
int in_pcbconnect __P((struct inpcb *, struct sockaddr *, struct proc *));
void in_pcbdetach __P((struct inpcb *));
void in_pcbdisconnect __P((struct inpcb *));
int in_pcbladdr __P((struct inpcb *, struct sockaddr *,
void in_losing __P((struct inpcb *));
int in_pcballoc __P((struct socket *, struct inpcbinfo *, struct proc *));
int in_pcbbind __P((struct inpcb *, struct sockaddr *, struct proc *));
int in_pcbconnect __P((struct inpcb *, struct sockaddr *, struct proc *));
void in_pcbdetach __P((struct inpcb *));
void in_pcbdisconnect __P((struct inpcb *));
int in_pcbinshash __P((struct inpcb *));
int in_pcbladdr __P((struct inpcb *, struct sockaddr *,
struct sockaddr_in **));
struct inpcb *
in_pcblookup __P((struct inpcbinfo *,
struct in_addr, u_int, struct in_addr, u_int, int));
in_pcblookup_local __P((struct inpcbinfo *,
struct in_addr, u_int, int));
struct inpcb *
in_pcblookuphash __P((struct inpcbinfo *,
in_pcblookup_hash __P((struct inpcbinfo *,
struct in_addr, u_int, struct in_addr, u_int, int));
void in_pcbnotify __P((struct inpcbhead *, struct sockaddr *,
void in_pcbnotify __P((struct inpcbhead *, struct sockaddr *,
u_int, struct in_addr, u_int, int, void (*)(struct inpcb *, int)));
void in_pcbrehash __P((struct inpcb *));
int in_setpeeraddr __P((struct socket *so, struct sockaddr **nam));
int in_setsockaddr __P((struct socket *so, struct sockaddr **nam));
void in_pcbrehash __P((struct inpcb *));
int in_setpeeraddr __P((struct socket *so, struct sockaddr **nam));
int in_setsockaddr __P((struct socket *so, struct sockaddr **nam));
#endif
#endif

View File

@ -30,7 +30,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $Id: ip_divert.c,v 1.16 1997/12/18 09:13:34 davidg Exp $
* $Id: ip_divert.c,v 1.17 1998/01/08 23:41:50 eivind Exp $
*/
#include "opt_inet.h"
@ -115,6 +115,7 @@ div_init(void)
* over the place for hashbase == NULL.
*/
divcbinfo.hashbase = hashinit(1, M_PCB, &divcbinfo.hashmask);
divcbinfo.porthashbase = hashinit(1, M_PCB, &divcbinfo.porthashmask);
}
/*

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)raw_ip.c 8.7 (Berkeley) 5/15/95
* $Id: raw_ip.c,v 1.49 1997/09/14 03:10:40 peter Exp $
* $Id: raw_ip.c,v 1.50 1997/12/18 09:13:39 davidg Exp $
*/
#include <sys/param.h>
@ -93,6 +93,7 @@ rip_init()
* over the place for hashbase == NULL.
*/
ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask);
ripcbinfo.porthashbase = hashinit(1, M_PCB, &ripcbinfo.porthashmask);
}
static struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_input.c 8.12 (Berkeley) 5/24/95
* $Id: tcp_input.c,v 1.67 1997/12/19 23:46:15 bde Exp $
* $Id: tcp_input.c,v 1.68 1998/01/21 02:05:59 fenner Exp $
*/
#include "opt_tcpdebug.h"
@ -105,27 +105,6 @@ static void tcp_xmit_timer __P((struct tcpcb *, int));
* Set DELACK for segments received in order, but ack immediately
* when segments are out of order (so fast retransmit can work).
*/
#ifdef TCP_ACK_HACK
#define TCP_REASS(tp, ti, m, so, flags) { \
if ((ti)->ti_seq == (tp)->rcv_nxt && \
(tp)->seg_next == (struct tcpiphdr *)(tp) && \
(tp)->t_state == TCPS_ESTABLISHED) { \
if (ti->ti_flags & TH_PUSH) \
tp->t_flags |= TF_ACKNOW; \
else \
tp->t_flags |= TF_DELACK; \
(tp)->rcv_nxt += (ti)->ti_len; \
flags = (ti)->ti_flags & TH_FIN; \
tcpstat.tcps_rcvpack++;\
tcpstat.tcps_rcvbyte += (ti)->ti_len;\
sbappend(&(so)->so_rcv, (m)); \
sorwakeup(so); \
} else { \
(flags) = tcp_reass((tp), (ti), (m)); \
tp->t_flags |= TF_ACKNOW; \
} \
}
#else
#define TCP_REASS(tp, ti, m, so, flags) { \
if ((ti)->ti_seq == (tp)->rcv_nxt && \
(tp)->seg_next == (struct tcpiphdr *)(tp) && \
@ -142,7 +121,6 @@ static void tcp_xmit_timer __P((struct tcpcb *, int));
tp->t_flags |= TF_ACKNOW; \
} \
}
#endif
#ifndef TUBA_INCLUDE
static int
@ -358,7 +336,7 @@ tcp_input(m, iphlen)
* Locate pcb for segment.
*/
findpcb:
inp = in_pcblookuphash(&tcbinfo, ti->ti_src, ti->ti_sport,
inp = in_pcblookup_hash(&tcbinfo, ti->ti_src, ti->ti_sport,
ti->ti_dst, ti->ti_dport, 1);
/*
@ -440,10 +418,16 @@ tcp_input(m, iphlen)
inp = (struct inpcb *)so->so_pcb;
inp->inp_laddr = ti->ti_dst;
inp->inp_lport = ti->ti_dport;
in_pcbrehash(inp);
#if BSD>=43
if (in_pcbinshash(inp) != 0) {
/*
* Undo the assignments above if we failed to put
* the PCB on the hash lists.
*/
inp->inp_laddr.s_addr = INADDR_ANY;
inp->inp_lport = 0;
goto drop;
}
inp->inp_options = ip_srcroute();
#endif
tp = intotcpcb(inp);
tp->t_state = TCPS_LISTEN;
tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT);

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_input.c 8.12 (Berkeley) 5/24/95
* $Id: tcp_input.c,v 1.67 1997/12/19 23:46:15 bde Exp $
* $Id: tcp_input.c,v 1.68 1998/01/21 02:05:59 fenner Exp $
*/
#include "opt_tcpdebug.h"
@ -105,27 +105,6 @@ static void tcp_xmit_timer __P((struct tcpcb *, int));
* Set DELACK for segments received in order, but ack immediately
* when segments are out of order (so fast retransmit can work).
*/
#ifdef TCP_ACK_HACK
#define TCP_REASS(tp, ti, m, so, flags) { \
if ((ti)->ti_seq == (tp)->rcv_nxt && \
(tp)->seg_next == (struct tcpiphdr *)(tp) && \
(tp)->t_state == TCPS_ESTABLISHED) { \
if (ti->ti_flags & TH_PUSH) \
tp->t_flags |= TF_ACKNOW; \
else \
tp->t_flags |= TF_DELACK; \
(tp)->rcv_nxt += (ti)->ti_len; \
flags = (ti)->ti_flags & TH_FIN; \
tcpstat.tcps_rcvpack++;\
tcpstat.tcps_rcvbyte += (ti)->ti_len;\
sbappend(&(so)->so_rcv, (m)); \
sorwakeup(so); \
} else { \
(flags) = tcp_reass((tp), (ti), (m)); \
tp->t_flags |= TF_ACKNOW; \
} \
}
#else
#define TCP_REASS(tp, ti, m, so, flags) { \
if ((ti)->ti_seq == (tp)->rcv_nxt && \
(tp)->seg_next == (struct tcpiphdr *)(tp) && \
@ -142,7 +121,6 @@ static void tcp_xmit_timer __P((struct tcpcb *, int));
tp->t_flags |= TF_ACKNOW; \
} \
}
#endif
#ifndef TUBA_INCLUDE
static int
@ -358,7 +336,7 @@ tcp_input(m, iphlen)
* Locate pcb for segment.
*/
findpcb:
inp = in_pcblookuphash(&tcbinfo, ti->ti_src, ti->ti_sport,
inp = in_pcblookup_hash(&tcbinfo, ti->ti_src, ti->ti_sport,
ti->ti_dst, ti->ti_dport, 1);
/*
@ -440,10 +418,16 @@ tcp_input(m, iphlen)
inp = (struct inpcb *)so->so_pcb;
inp->inp_laddr = ti->ti_dst;
inp->inp_lport = ti->ti_dport;
in_pcbrehash(inp);
#if BSD>=43
if (in_pcbinshash(inp) != 0) {
/*
* Undo the assignments above if we failed to put
* the PCB on the hash lists.
*/
inp->inp_laddr.s_addr = INADDR_ANY;
inp->inp_lport = 0;
goto drop;
}
inp->inp_options = ip_srcroute();
#endif
tp = intotcpcb(inp);
tp->t_state = TCPS_LISTEN;
tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT);

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95
* $Id: tcp_subr.c,v 1.40 1997/12/19 03:36:14 julian Exp $
* $Id: tcp_subr.c,v 1.41 1998/01/25 04:23:32 eivind Exp $
*/
#include "opt_compat.h"
@ -87,11 +87,10 @@ static void tcp_cleartaocache __P((void));
static void tcp_notify __P((struct inpcb *, int));
/*
* Target size of TCP PCB hash table. Will be rounded down to a prime
* number.
* Target size of TCP PCB hash tables. Must be a power of two.
*/
#ifndef TCBHASHSIZE
#define TCBHASHSIZE 128
#define TCBHASHSIZE 512
#endif
/*
@ -107,6 +106,7 @@ tcp_init()
LIST_INIT(&tcb);
tcbinfo.listhead = &tcb;
tcbinfo.hashbase = hashinit(TCBHASHSIZE, M_PCB, &tcbinfo.hashmask);
tcbinfo.porthashbase = hashinit(TCBHASHSIZE, M_PCB, &tcbinfo.porthashmask);
if (max_protohdr < sizeof(struct tcpiphdr))
max_protohdr = sizeof(struct tcpiphdr);
if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN)
@ -417,8 +417,8 @@ tcp_close(tp)
}
if (tp->t_template)
(void) m_free(dtom(tp->t_template));
inp->inp_ppcb = NULL;
free(tp, M_PCB);
inp->inp_ppcb = 0;
soisdisconnected(so);
in_pcbdetach(inp);
tcpstat.tcps_closed++;

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95
* $Id: tcp_subr.c,v 1.40 1997/12/19 03:36:14 julian Exp $
* $Id: tcp_subr.c,v 1.41 1998/01/25 04:23:32 eivind Exp $
*/
#include "opt_compat.h"
@ -87,11 +87,10 @@ static void tcp_cleartaocache __P((void));
static void tcp_notify __P((struct inpcb *, int));
/*
* Target size of TCP PCB hash table. Will be rounded down to a prime
* number.
* Target size of TCP PCB hash tables. Must be a power of two.
*/
#ifndef TCBHASHSIZE
#define TCBHASHSIZE 128
#define TCBHASHSIZE 512
#endif
/*
@ -107,6 +106,7 @@ tcp_init()
LIST_INIT(&tcb);
tcbinfo.listhead = &tcb;
tcbinfo.hashbase = hashinit(TCBHASHSIZE, M_PCB, &tcbinfo.hashmask);
tcbinfo.porthashbase = hashinit(TCBHASHSIZE, M_PCB, &tcbinfo.porthashmask);
if (max_protohdr < sizeof(struct tcpiphdr))
max_protohdr = sizeof(struct tcpiphdr);
if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN)
@ -417,8 +417,8 @@ tcp_close(tp)
}
if (tp->t_template)
(void) m_free(dtom(tp->t_template));
inp->inp_ppcb = NULL;
free(tp, M_PCB);
inp->inp_ppcb = 0;
soisdisconnected(so);
in_pcbdetach(inp);
tcpstat.tcps_closed++;

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94
* $Id: tcp_usrreq.c,v 1.35 1997/09/16 18:36:06 joerg Exp $
* $Id: tcp_usrreq.c,v 1.36 1997/12/18 09:50:38 davidg Exp $
*/
#include "opt_tcpdebug.h"
@ -483,9 +483,9 @@ tcp_connect(tp, nam, p)
struct tcpcb *otp;
struct sockaddr_in *sin = (struct sockaddr_in *)nam;
struct sockaddr_in *ifaddr;
int error;
struct rmxp_tao *taop;
struct rmxp_tao tao_noncached;
int error;
if (inp->inp_lport == 0) {
error = in_pcbbind(inp, (struct sockaddr *)0, p);
@ -501,7 +501,7 @@ tcp_connect(tp, nam, p)
error = in_pcbladdr(inp, nam, &ifaddr);
if (error)
return error;
oinp = in_pcblookuphash(inp->inp_pcbinfo,
oinp = in_pcblookup_hash(inp->inp_pcbinfo,
sin->sin_addr, sin->sin_port,
inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr
: ifaddr->sin_addr,

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_var.h 8.4 (Berkeley) 5/24/95
* $Id: tcp_var.h,v 1.38 1997/02/22 09:41:43 peter Exp $
* $Id: tcp_var.h,v 1.39 1997/04/27 20:01:15 wollman Exp $
*/
#ifndef _NETINET_TCP_VAR_H_
@ -42,18 +42,18 @@
/*
* Tcp control block, one per tcp; fields:
* Organized for 16 byte cacheline efficiency.
*/
struct tcpcb {
struct tcpiphdr *seg_next; /* sequencing queue */
struct tcpiphdr *seg_prev;
int t_state; /* state of this connection */
int t_timer[TCPT_NTIMERS]; /* tcp timers */
int t_rxtshift; /* log(2) of rexmt exp. backoff */
int t_rxtcur; /* current retransmit value */
int t_dupacks; /* consecutive dup acks recd */
u_int t_maxseg; /* maximum segment size */
u_int t_maxopd; /* mss plus options */
int t_force; /* 1 if forcing out a byte */
struct tcpiphdr *t_template; /* skeletal packet for transmit */
int t_timer[TCPT_NTIMERS]; /* tcp timers */
struct inpcb *t_inpcb; /* back pointer to internet pcb */
int t_state; /* state of this connection */
u_int t_flags;
#define TF_ACKNOW 0x0001 /* ack peer immediately */
#define TF_DELACK 0x0002 /* ack, but try to delay it */
@ -71,77 +71,69 @@ struct tcpcb {
#define TF_REQ_CC 0x2000 /* have/will request CC */
#define TF_RCVD_CC 0x4000 /* a CC was received in SYN */
#define TF_SENDCCNEW 0x8000 /* send CCnew instead of CC in SYN */
int t_force; /* 1 if forcing out a byte */
struct tcpiphdr *t_template; /* skeletal packet for transmit */
struct inpcb *t_inpcb; /* back pointer to internet pcb */
/*
* The following fields are used as in the protocol specification.
* See RFC783, Dec. 1981, page 21.
*/
/* send sequence variables */
tcp_seq snd_una; /* send unacknowledged */
tcp_seq snd_nxt; /* send next */
tcp_seq snd_up; /* send urgent pointer */
tcp_seq snd_wl1; /* window update seg seq number */
tcp_seq snd_wl2; /* window update seg ack number */
tcp_seq iss; /* initial send sequence number */
u_long snd_wnd; /* send window */
/* receive sequence variables */
u_long rcv_wnd; /* receive window */
tcp_seq rcv_nxt; /* receive next */
tcp_seq rcv_up; /* receive urgent pointer */
tcp_seq irs; /* initial receive sequence number */
/*
* Additional variables for this implementation.
*/
/* receive variables */
tcp_seq rcv_adv; /* advertised window */
/* retransmit variables */
tcp_seq snd_max; /* highest sequence number sent;
* used to recognize retransmits
*/
/* congestion control (for slow start, source quench, retransmit after loss) */
tcp_seq snd_nxt; /* send next */
tcp_seq snd_up; /* send urgent pointer */
tcp_seq snd_wl1; /* window update seg seq number */
tcp_seq snd_wl2; /* window update seg ack number */
tcp_seq iss; /* initial send sequence number */
tcp_seq irs; /* initial receive sequence number */
tcp_seq rcv_nxt; /* receive next */
tcp_seq rcv_adv; /* advertised window */
u_long rcv_wnd; /* receive window */
tcp_seq rcv_up; /* receive urgent pointer */
u_long snd_wnd; /* send window */
u_long snd_cwnd; /* congestion-controlled window */
u_long snd_ssthresh; /* snd_cwnd size threshold for
* for slow start exponential to
* linear switch
*/
/*
* transmit timing stuff. See below for scale of srtt and rttvar.
* "Variance" is actually smoothed difference.
*/
u_int t_maxopd; /* mss plus options */
u_int t_idle; /* inactivity time */
u_long t_duration; /* connection duration */
int t_rtt; /* round trip time */
tcp_seq t_rtseq; /* sequence number being timed */
int t_rxtcur; /* current retransmit value */
u_int t_maxseg; /* maximum segment size */
int t_srtt; /* smoothed round-trip time */
int t_rttvar; /* variance in round-trip time */
int t_rxtshift; /* log(2) of rexmt exp. backoff */
u_int t_rttmin; /* minimum rtt allowed */
u_long t_rttupdated; /* number of times rtt sampled */
u_long max_sndwnd; /* largest window peer has offered */
int t_softerror; /* possible error not yet reported */
/* out-of-band data */
char t_oobflags; /* have some */
char t_iobc; /* input character */
#define TCPOOB_HAVEDATA 0x01
#define TCPOOB_HADDATA 0x02
int t_softerror; /* possible error not yet reported */
/* RFC 1323 variables */
u_char snd_scale; /* window scaling for send window */
u_char rcv_scale; /* window scaling for recv window */
u_char request_r_scale; /* pending window scaling */
u_char requested_s_scale;
u_long ts_recent; /* timestamp echo data */
u_long ts_recent_age; /* when last updated */
tcp_seq last_ack_sent;
/* RFC 1644 variables */
tcp_cc cc_send; /* send connection count */
tcp_cc cc_recv; /* receive connection count */
u_long t_duration; /* connection duration */
/* TUBA stuff */
caddr_t t_tuba_pcb; /* next level down pcb for TCP over z */
/* More RTT stuff */
u_long t_rttupdated; /* number of times rtt sampled */
};
/*

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95
* $Id: udp_usrreq.c,v 1.42 1997/12/19 23:46:21 bde Exp $
* $Id: udp_usrreq.c,v 1.43 1998/01/25 17:25:41 steve Exp $
*/
#include <sys/param.h>
@ -78,7 +78,7 @@ static struct inpcbhead udb; /* from udp_var.h */
static struct inpcbinfo udbinfo;
#ifndef UDBHASHSIZE
#define UDBHASHSIZE 64
#define UDBHASHSIZE 16
#endif
static struct udpstat udpstat; /* from udp_var.h */
@ -97,6 +97,7 @@ udp_init()
LIST_INIT(&udb);
udbinfo.listhead = &udb;
udbinfo.hashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.hashmask);
udbinfo.porthashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.porthashmask);
}
void
@ -274,7 +275,7 @@ udp_input(m, iphlen)
/*
* Locate pcb for datagram.
*/
inp = in_pcblookuphash(&udbinfo, ip->ip_src, uh->uh_sport,
inp = in_pcblookup_hash(&udbinfo, ip->ip_src, uh->uh_sport,
ip->ip_dst, uh->uh_dport, 1);
if (inp == NULL) {
if (log_in_vain) {