Consolidate insertion of TCP options into a segment from within tcp_output()
and syncache_respond() into its own generic function tcp_addoptions(). tcp_addoptions() is alignment agnostic and does optimal packing in all cases. In struct tcpopt rename to_requested_s_scale to just to_wscale. Add a comment with quote from RFC1323: "The Window field in a SYN (i.e., a <SYN> or <SYN,ACK>) segment itself is never scaled." Reviewed by: silby, mohans, julian Sponsored by: TCP/IP Optimization Fundraise 2005
This commit is contained in:
parent
7a5897d4d9
commit
02a1a64357
@ -76,14 +76,17 @@ struct tcphdr {
|
||||
};
|
||||
|
||||
#define TCPOPT_EOL 0
|
||||
#define TCPOLEN_EOL 1
|
||||
#define TCPOPT_NOP 1
|
||||
#define TCPOLEN_NOP 1
|
||||
#define TCPOPT_MAXSEG 2
|
||||
#define TCPOLEN_MAXSEG 4
|
||||
#define TCPOPT_WINDOW 3
|
||||
#define TCPOLEN_WINDOW 3
|
||||
#define TCPOPT_SACK_PERMITTED 4 /* Experimental */
|
||||
#define TCPOPT_SACK_PERMITTED 4
|
||||
#define TCPOLEN_SACK_PERMITTED 2
|
||||
#define TCPOPT_SACK 5 /* Experimental */
|
||||
#define TCPOPT_SACK 5
|
||||
#define TCPOLEN_SACKHDR 2
|
||||
#define TCPOLEN_SACK 8 /* 2*sizeof(tcp_seq) */
|
||||
#define TCPOPT_TIMESTAMP 8
|
||||
#define TCPOLEN_TIMESTAMP 10
|
||||
|
@ -1146,7 +1146,7 @@ tcp_input(m, off0)
|
||||
if ((to.to_flags & TOF_SCALE) &&
|
||||
(tp->t_flags & TF_REQ_SCALE)) {
|
||||
tp->t_flags |= TF_RCVD_SCALE;
|
||||
tp->snd_scale = to.to_requested_s_scale;
|
||||
tp->snd_scale = to.to_wscale;
|
||||
tp->snd_wnd = th->th_win << tp->snd_scale;
|
||||
tiwin = tp->snd_wnd;
|
||||
}
|
||||
@ -2745,7 +2745,7 @@ tcp_dooptions(to, cp, cnt, flags)
|
||||
if (!(flags & TO_SYN))
|
||||
continue;
|
||||
to->to_flags |= TOF_SCALE;
|
||||
to->to_requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
|
||||
to->to_wscale = min(cp[2], TCP_MAX_WINSHIFT);
|
||||
break;
|
||||
case TCPOPT_TIMESTAMP:
|
||||
if (optlen != TCPOLEN_TIMESTAMP)
|
||||
|
@ -142,10 +142,10 @@ tcp_output(struct tcpcb *tp)
|
||||
u_char opt[TCP_MAXOLEN];
|
||||
unsigned ipoptlen, optlen, hdrlen;
|
||||
int idle, sendalot;
|
||||
int i, sack_rxmit;
|
||||
int sack_bytes_rxmt;
|
||||
int sack_rxmit, sack_bytes_rxmt;
|
||||
struct sackhole *p;
|
||||
int tso = 0;
|
||||
struct tcpopt to;
|
||||
#if 0
|
||||
int maxburst = TCP_MAXBURST;
|
||||
#endif
|
||||
@ -626,157 +626,67 @@ tcp_output(struct tcpcb *tp)
|
||||
else
|
||||
#endif
|
||||
hdrlen = sizeof (struct tcpiphdr);
|
||||
if (flags & TH_SYN) {
|
||||
tp->snd_nxt = tp->iss;
|
||||
if ((tp->t_flags & TF_NOOPT) == 0) {
|
||||
u_short mss;
|
||||
|
||||
opt[0] = TCPOPT_MAXSEG;
|
||||
opt[1] = TCPOLEN_MAXSEG;
|
||||
mss = htons((u_short) tcp_mssopt(&tp->t_inpcb->inp_inc));
|
||||
(void)memcpy(opt + 2, &mss, sizeof(mss));
|
||||
optlen = TCPOLEN_MAXSEG;
|
||||
|
||||
if ((tp->t_flags & TF_REQ_SCALE) &&
|
||||
((flags & TH_ACK) == 0 ||
|
||||
(tp->t_flags & TF_RCVD_SCALE))) {
|
||||
*((u_int32_t *)(opt + optlen)) = htonl(
|
||||
TCPOPT_NOP << 24 |
|
||||
TCPOPT_WINDOW << 16 |
|
||||
TCPOLEN_WINDOW << 8 |
|
||||
tp->request_r_scale);
|
||||
optlen += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Send a timestamp and echo-reply if this is a SYN and our side
|
||||
* wants to use timestamps (TF_REQ_TSTMP is set) or both our side
|
||||
* and our peer have sent timestamps in our SYN's.
|
||||
* Compute options for segment.
|
||||
* We only have to care about SYN and established connection
|
||||
* segments. Options for SYN-ACK segments are handled in TCP
|
||||
* syncache.
|
||||
*/
|
||||
if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
|
||||
(flags & TH_RST) == 0 &&
|
||||
((flags & TH_ACK) == 0 ||
|
||||
(tp->t_flags & TF_RCVD_TSTMP))) {
|
||||
u_int32_t *lp = (u_int32_t *)(opt + optlen);
|
||||
|
||||
/* Form timestamp option as shown in appendix A of RFC 1323. */
|
||||
*lp++ = htonl(TCPOPT_TSTAMP_HDR);
|
||||
*lp++ = htonl(ticks + tp->ts_offset);
|
||||
*lp = htonl(tp->ts_recent);
|
||||
optlen += TCPOLEN_TSTAMP_APPA;
|
||||
}
|
||||
|
||||
/* Set receive buffer autosizing timestamp. */
|
||||
if (tp->rfbuf_ts == 0 && (so->so_rcv.sb_flags & SB_AUTOSIZE))
|
||||
tp->rfbuf_ts = ticks;
|
||||
|
||||
if ((tp->t_flags & TF_NOOPT) == 0) {
|
||||
to.to_flags = 0;
|
||||
/* Maximum segment size. */
|
||||
if (flags & TH_SYN) {
|
||||
tp->snd_nxt = tp->iss;
|
||||
to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc);
|
||||
to.to_flags |= TOF_MSS;
|
||||
}
|
||||
/* Window scaling. */
|
||||
if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) {
|
||||
to.to_wscale = tp->request_r_scale;
|
||||
to.to_flags |= TOF_SCALE;
|
||||
}
|
||||
/* Timestamps. */
|
||||
if ((tp->t_flags & TF_RCVD_TSTMP) ||
|
||||
((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) {
|
||||
to.to_tsval = ticks + tp->ts_offset;
|
||||
to.to_tsecr = tp->ts_recent;
|
||||
to.to_flags |= TOF_TS;
|
||||
/* Set receive buffer autosizing timestamp. */
|
||||
if (tp->rfbuf_ts == 0 &&
|
||||
(so->so_rcv.sb_flags & SB_AUTOSIZE))
|
||||
tp->rfbuf_ts = ticks;
|
||||
}
|
||||
/* Selective ACK's. */
|
||||
if (tp->sack_enable) {
|
||||
if (flags & TH_SYN)
|
||||
to.to_flags |= TOF_SACKPERM;
|
||||
else if (TCPS_HAVEESTABLISHED(tp->t_state) &&
|
||||
(tp->t_flags & TF_SACK_PERMIT) &&
|
||||
tp->rcv_numsacks > 0) {
|
||||
to.to_flags |= TOF_SACK;
|
||||
to.to_nsacks = tp->rcv_numsacks;
|
||||
to.to_sacks = (u_char *)tp->sackblks;
|
||||
}
|
||||
}
|
||||
#ifdef TCP_SIGNATURE
|
||||
/* TCP-MD5 (RFC2385). */
|
||||
#ifdef INET6
|
||||
if (!isipv6)
|
||||
#endif
|
||||
if (tp->t_flags & TF_SIGNATURE) {
|
||||
int i;
|
||||
u_char *bp;
|
||||
|
||||
/* Initialize TCP-MD5 option (RFC2385) */
|
||||
bp = (u_char *)opt + optlen;
|
||||
*bp++ = TCPOPT_SIGNATURE;
|
||||
*bp++ = TCPOLEN_SIGNATURE;
|
||||
sigoff = optlen + 2;
|
||||
for (i = 0; i < TCP_SIGLEN; i++)
|
||||
*bp++ = 0;
|
||||
optlen += TCPOLEN_SIGNATURE;
|
||||
}
|
||||
if (!isipv6 && (tp->t_flags & TF_SIGNATURE))
|
||||
#else
|
||||
if (tp->t_flags & TF_SIGNATURE)
|
||||
#endif /* INET6 */
|
||||
to.to_flags |= TOF_SIGNATURE;
|
||||
#endif /* TCP_SIGNATURE */
|
||||
|
||||
if (tp->sack_enable && ((tp->t_flags & TF_NOOPT) == 0)) {
|
||||
/*
|
||||
* Tack on the SACK permitted option *last*.
|
||||
* And do padding of options after tacking this on.
|
||||
* This is because of MSS, TS, WinScale and Signatures are
|
||||
* all present, we have just 2 bytes left for the SACK
|
||||
* permitted option, which is just enough.
|
||||
*/
|
||||
/*
|
||||
* If this is the first SYN of connection (not a SYN
|
||||
* ACK), include SACK permitted option. If this is a
|
||||
* SYN ACK, include SACK permitted option if peer has
|
||||
* already done so. This is only for active connect,
|
||||
* since the syncache takes care of the passive connect.
|
||||
*/
|
||||
if ((flags & TH_SYN) &&
|
||||
(!(flags & TH_ACK) || (tp->t_flags & TF_SACK_PERMIT))) {
|
||||
u_char *bp;
|
||||
bp = (u_char *)opt + optlen;
|
||||
/* Processing the options. */
|
||||
hdrlen += optlen = tcp_addoptions(&to, (u_char *)&opt);
|
||||
|
||||
*bp++ = TCPOPT_SACK_PERMITTED;
|
||||
*bp++ = TCPOLEN_SACK_PERMITTED;
|
||||
optlen += TCPOLEN_SACK_PERMITTED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Send SACKs if necessary. This should be the last
|
||||
* option processed. Only as many SACKs are sent as
|
||||
* are permitted by the maximum options size.
|
||||
*
|
||||
* In general, SACK blocks consume 8*n+2 bytes.
|
||||
* So a full size SACK blocks option is 34 bytes
|
||||
* (to generate 4 SACK blocks). At a minimum,
|
||||
* we need 10 bytes (to generate 1 SACK block).
|
||||
* If TCP Timestamps (12 bytes) and TCP Signatures
|
||||
* (18 bytes) are both present, we'll just have
|
||||
* 10 bytes for SACK options 40 - (12 + 18).
|
||||
*/
|
||||
if (TCPS_HAVEESTABLISHED(tp->t_state) &&
|
||||
(tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0 &&
|
||||
MAX_TCPOPTLEN - optlen - 2 >= TCPOLEN_SACK) {
|
||||
int nsack, sackoptlen, padlen;
|
||||
u_char *bp = (u_char *)opt + optlen;
|
||||
u_int32_t *lp;
|
||||
|
||||
nsack = (MAX_TCPOPTLEN - optlen - 2) / TCPOLEN_SACK;
|
||||
nsack = min(nsack, tp->rcv_numsacks);
|
||||
sackoptlen = (2 + nsack * TCPOLEN_SACK);
|
||||
|
||||
/*
|
||||
* First we need to pad options so that the
|
||||
* SACK blocks can start at a 4-byte boundary
|
||||
* (sack option and length are at a 2 byte offset).
|
||||
*/
|
||||
padlen = (MAX_TCPOPTLEN - optlen - sackoptlen) % 4;
|
||||
optlen += padlen;
|
||||
while (padlen-- > 0)
|
||||
*bp++ = TCPOPT_NOP;
|
||||
|
||||
tcpstat.tcps_sack_send_blocks++;
|
||||
*bp++ = TCPOPT_SACK;
|
||||
*bp++ = sackoptlen;
|
||||
lp = (u_int32_t *)bp;
|
||||
for (i = 0; i < nsack; i++) {
|
||||
struct sackblk sack = tp->sackblks[i];
|
||||
*lp++ = htonl(sack.start);
|
||||
*lp++ = htonl(sack.end);
|
||||
}
|
||||
optlen += sackoptlen;
|
||||
}
|
||||
#ifdef TCP_SIGNATURE
|
||||
sigoff = to.to_signature - (u_char *)&to;
|
||||
#endif /* TCP_SIGNATURE */
|
||||
}
|
||||
|
||||
/* Pad TCP options to a 4 byte boundary */
|
||||
if (optlen < MAX_TCPOPTLEN && (optlen % sizeof(u_int32_t))) {
|
||||
int pad = sizeof(u_int32_t) - (optlen % sizeof(u_int32_t));
|
||||
u_char *bp = (u_char *)opt + optlen;
|
||||
|
||||
optlen += pad;
|
||||
while (pad) {
|
||||
*bp++ = TCPOPT_EOL;
|
||||
pad--;
|
||||
}
|
||||
}
|
||||
|
||||
hdrlen += optlen;
|
||||
|
||||
#ifdef INET6
|
||||
if (isipv6)
|
||||
ipoptlen = ip6_optlen(tp->t_inpcb);
|
||||
@ -876,11 +786,11 @@ tcp_output(struct tcpcb *tp)
|
||||
m->m_data += max_linkhdr;
|
||||
m->m_len = hdrlen;
|
||||
if (len <= MHLEN - hdrlen - max_linkhdr) {
|
||||
m_copydata(so->so_snd.sb_mb, off, (int) len,
|
||||
m_copydata(so->so_snd.sb_mb, off, (int)len,
|
||||
mtod(m, caddr_t) + hdrlen);
|
||||
m->m_len += len;
|
||||
} else {
|
||||
m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len);
|
||||
m->m_next = m_copy(so->so_snd.sb_mb, off, (int)len);
|
||||
if (m->m_next == 0) {
|
||||
SOCKBUF_UNLOCK(&so->so_snd);
|
||||
(void) m_free(m);
|
||||
@ -983,6 +893,9 @@ tcp_output(struct tcpcb *tp)
|
||||
/*
|
||||
* Calculate receive window. Don't shrink window,
|
||||
* but avoid silly window syndrome.
|
||||
*
|
||||
* XXX: RFC1323: The Window field in a SYN (i.e., a <SYN> or
|
||||
* <SYN,ACK>) segment itself is never scaled.
|
||||
*/
|
||||
if (recwin < (long)(so->so_rcv.sb_hiwat / 4) &&
|
||||
recwin < (long)tp->t_maxseg)
|
||||
@ -1320,3 +1233,143 @@ tcp_setpersist(tp)
|
||||
if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
|
||||
tp->t_rxtshift++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert TCP options according to the supplied parameters to the place
|
||||
* optp in a consistent way. Can handle unaligned destinations.
|
||||
*
|
||||
* The order of the option processing is crucial for optimal packing and
|
||||
* alignment for the scarce option space.
|
||||
*
|
||||
* The optimal order for a SYN/SYN-ACK segment is:
|
||||
* MSS (4) + NOP (1) + Window scale (3) + SACK permitted (2) +
|
||||
* Timestamp (10) + Signature (18) = 38 bytes out of a maximum of 40.
|
||||
*
|
||||
* The SACK options should be last. SACK blocks consume 8*n+2 bytes.
|
||||
* So a full size SACK blocks option is 34 bytes (with 4 SACK blocks).
|
||||
* At minimum we need 10 bytes (to generate 1 SACK block). If both
|
||||
* TCP Timestamps (12 bytes) and TCP Signatures (18 bytes) are present,
|
||||
* we only have 10 bytes for SACK options (40 - (12 + 18)).
|
||||
*/
|
||||
int
|
||||
tcp_addoptions(struct tcpopt *to, u_char *optp)
|
||||
{
|
||||
u_int mask, optlen = 0;
|
||||
|
||||
for (mask = 1; mask < TOF_MAXOPT; mask <<= 1) {
|
||||
if ((to->to_flags & mask) != mask)
|
||||
continue;
|
||||
switch (to->to_flags & mask) {
|
||||
case TOF_MSS:
|
||||
while (optlen % 4) {
|
||||
optlen += TCPOLEN_NOP;
|
||||
*optp++ = TCPOPT_NOP;
|
||||
}
|
||||
optlen += TCPOLEN_MAXSEG;
|
||||
*optp++ = TCPOPT_MAXSEG;
|
||||
*optp++ = TCPOLEN_MAXSEG;
|
||||
to->to_mss = htons(to->to_mss);
|
||||
bcopy((u_char *)&to->to_mss, optp, sizeof(to->to_mss));
|
||||
optp += sizeof(to->to_mss);
|
||||
break;
|
||||
case TOF_SCALE:
|
||||
while (!optlen || optlen % 2 != 1) {
|
||||
optlen += TCPOLEN_NOP;
|
||||
*optp++ = TCPOPT_NOP;
|
||||
}
|
||||
optlen += TCPOLEN_WINDOW;
|
||||
*optp++ = TCPOPT_WINDOW;
|
||||
*optp++ = TCPOLEN_WINDOW;
|
||||
*optp++ = to->to_wscale;
|
||||
break;
|
||||
case TOF_SACKPERM:
|
||||
while (optlen % 2) {
|
||||
optlen += TCPOLEN_NOP;
|
||||
*optp++ = TCPOPT_NOP;
|
||||
}
|
||||
optlen += TCPOLEN_SACK_PERMITTED;
|
||||
*optp++ = TCPOPT_SACK_PERMITTED;
|
||||
*optp++ = TCPOLEN_SACK_PERMITTED;
|
||||
break;
|
||||
case TOF_TS:
|
||||
while (!optlen || optlen % 4 != 2) {
|
||||
optlen += TCPOLEN_NOP;
|
||||
*optp++ = TCPOPT_NOP;
|
||||
}
|
||||
optlen += TCPOLEN_TIMESTAMP;
|
||||
*optp++ = TCPOPT_TIMESTAMP;
|
||||
*optp++ = TCPOLEN_TIMESTAMP;
|
||||
to->to_tsval = htonl(to->to_tsval);
|
||||
to->to_tsecr = htonl(to->to_tsecr);
|
||||
bcopy((u_char *)&to->to_tsval, optp, sizeof(to->to_tsval));
|
||||
optp += sizeof(to->to_tsval);
|
||||
bcopy((u_char *)&to->to_tsecr, optp, sizeof(to->to_tsecr));
|
||||
optp += sizeof(to->to_tsecr);
|
||||
break;
|
||||
case TOF_SIGNATURE:
|
||||
{
|
||||
int siglen = TCPOLEN_SIGNATURE - 2;
|
||||
|
||||
while (!optlen || optlen % 4 != 2) {
|
||||
optlen += TCPOLEN_NOP;
|
||||
*optp++ = TCPOPT_NOP;
|
||||
}
|
||||
if (MAX_TCPOPTLEN - optlen < TCPOLEN_SIGNATURE)
|
||||
continue;
|
||||
optlen += TCPOLEN_SIGNATURE;
|
||||
*optp++ = TCPOPT_SIGNATURE;
|
||||
*optp++ = TCPOLEN_SIGNATURE;
|
||||
to->to_signature = optp;
|
||||
while (siglen--)
|
||||
*optp++ = 0;
|
||||
break;
|
||||
}
|
||||
case TOF_SACK:
|
||||
{
|
||||
int sackblks = 0;
|
||||
struct sackblk *sack = (struct sackblk *)to->to_sacks;
|
||||
tcp_seq sack_seq;
|
||||
|
||||
while (!optlen || optlen % 4 != 2) {
|
||||
optlen += TCPOLEN_NOP;
|
||||
*optp++ = TCPOPT_NOP;
|
||||
}
|
||||
if (MAX_TCPOPTLEN - optlen < 2 + TCPOLEN_SACK)
|
||||
continue;
|
||||
optlen += TCPOLEN_SACKHDR;
|
||||
*optp++ = TCPOPT_SACK;
|
||||
sackblks = min(to->to_nsacks,
|
||||
(MAX_TCPOPTLEN - optlen) / TCPOLEN_SACK);
|
||||
*optp++ = TCPOLEN_SACKHDR + sackblks * TCPOLEN_SACK;
|
||||
while (sackblks--) {
|
||||
sack_seq = htonl(sack->start);
|
||||
bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq));
|
||||
optp += sizeof(sack_seq);
|
||||
sack_seq = htonl(sack->end);
|
||||
bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq));
|
||||
optp += sizeof(sack_seq);
|
||||
optlen += TCPOLEN_SACK;
|
||||
sack++;
|
||||
}
|
||||
tcpstat.tcps_sack_send_blocks++;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
panic("%s: unknown TCP option type", __func__);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Terminate and pad TCP options to a 4 byte boundary. */
|
||||
if (optlen % 4) {
|
||||
optlen += TCPOLEN_EOL;
|
||||
*optp++ = TCPOPT_EOL;
|
||||
}
|
||||
while (optlen % 4) {
|
||||
optlen += TCPOLEN_NOP;
|
||||
*optp++ = TCPOPT_NOP;
|
||||
}
|
||||
|
||||
KASSERT(optlen <= MAX_TCPOPTLEN, ("%s: TCP options too long", __func__));
|
||||
return (optlen);
|
||||
}
|
||||
|
@ -1146,7 +1146,7 @@ tcp_input(m, off0)
|
||||
if ((to.to_flags & TOF_SCALE) &&
|
||||
(tp->t_flags & TF_REQ_SCALE)) {
|
||||
tp->t_flags |= TF_RCVD_SCALE;
|
||||
tp->snd_scale = to.to_requested_s_scale;
|
||||
tp->snd_scale = to.to_wscale;
|
||||
tp->snd_wnd = th->th_win << tp->snd_scale;
|
||||
tiwin = tp->snd_wnd;
|
||||
}
|
||||
@ -2745,7 +2745,7 @@ tcp_dooptions(to, cp, cnt, flags)
|
||||
if (!(flags & TO_SYN))
|
||||
continue;
|
||||
to->to_flags |= TOF_SCALE;
|
||||
to->to_requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
|
||||
to->to_wscale = min(cp[2], TCP_MAX_WINSHIFT);
|
||||
break;
|
||||
case TCPOPT_TIMESTAMP:
|
||||
if (optlen != TCPOLEN_TIMESTAMP)
|
||||
|
@ -1020,12 +1020,15 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
|
||||
* with auto sizing. This allows us to scale the
|
||||
* receive buffer over a wide range while not losing
|
||||
* any efficiency or fine granularity.
|
||||
*
|
||||
* RFC1323: The Window field in a SYN (i.e., a <SYN>
|
||||
* or <SYN,ACK>) segment itself is never scaled.
|
||||
*/
|
||||
while (wscale < TCP_MAX_WINSHIFT &&
|
||||
(0x1 << wscale) < tcp_minmss)
|
||||
wscale++;
|
||||
sc->sc_requested_r_scale = wscale;
|
||||
sc->sc_requested_s_scale = to->to_requested_s_scale;
|
||||
sc->sc_requested_s_scale = to->to_wscale;
|
||||
sc->sc_flags |= SCF_WINSCALE;
|
||||
}
|
||||
}
|
||||
@ -1097,8 +1100,8 @@ syncache_respond(struct syncache *sc, struct mbuf *m)
|
||||
struct ip *ip = NULL;
|
||||
struct tcphdr *th;
|
||||
int optlen, error;
|
||||
u_int16_t tlen, hlen, mssopt;
|
||||
u_int8_t *optp;
|
||||
u_int16_t hlen, tlen, mssopt;
|
||||
struct tcpopt to;
|
||||
#ifdef INET6
|
||||
struct ip6_hdr *ip6 = NULL;
|
||||
#endif
|
||||
@ -1108,33 +1111,16 @@ syncache_respond(struct syncache *sc, struct mbuf *m)
|
||||
(sc->sc_inc.inc_isipv6) ? sizeof(struct ip6_hdr) :
|
||||
#endif
|
||||
sizeof(struct ip);
|
||||
tlen = hlen + sizeof(struct tcphdr);
|
||||
|
||||
/* Determine MSS we advertize to other end of connection. */
|
||||
mssopt = tcp_mssopt(&sc->sc_inc);
|
||||
if (sc->sc_peer_mss)
|
||||
mssopt = max( min(sc->sc_peer_mss, mssopt), tcp_minmss);
|
||||
|
||||
/* Compute the size of the TCP options. */
|
||||
if (sc->sc_flags & SCF_NOOPT) {
|
||||
optlen = 0;
|
||||
} else {
|
||||
optlen = TCPOLEN_MAXSEG +
|
||||
((sc->sc_flags & SCF_WINSCALE) ? 4 : 0) +
|
||||
((sc->sc_flags & SCF_TIMESTAMP) ? TCPOLEN_TSTAMP_APPA : 0);
|
||||
#ifdef TCP_SIGNATURE
|
||||
if (sc->sc_flags & SCF_SIGNATURE)
|
||||
optlen += TCPOLEN_SIGNATURE;
|
||||
#endif
|
||||
if (sc->sc_flags & SCF_SACK)
|
||||
optlen += TCPOLEN_SACK_PERMITTED;
|
||||
optlen = roundup2(optlen, 4);
|
||||
}
|
||||
tlen = hlen + sizeof(struct tcphdr) + optlen;
|
||||
|
||||
/*
|
||||
* XXX: Assume that the entire packet will fit in a header mbuf.
|
||||
*/
|
||||
KASSERT(max_linkhdr + tlen <= MHLEN, ("syncache: mbuf too small"));
|
||||
/* XXX: Assume that the entire packet will fit in a header mbuf. */
|
||||
KASSERT(max_linkhdr + tlen + MAX_TCPOPTLEN <= MHLEN,
|
||||
("syncache: mbuf too small"));
|
||||
|
||||
/* Create the IP+TCP header from scratch. */
|
||||
if (m)
|
||||
@ -1197,70 +1183,52 @@ syncache_respond(struct syncache *sc, struct mbuf *m)
|
||||
|
||||
th->th_seq = htonl(sc->sc_iss);
|
||||
th->th_ack = htonl(sc->sc_irs + 1);
|
||||
th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
|
||||
th->th_off = sizeof(struct tcphdr) >> 2;
|
||||
th->th_x2 = 0;
|
||||
th->th_flags = TH_SYN|TH_ACK;
|
||||
th->th_win = htons(sc->sc_wnd);
|
||||
th->th_urp = 0;
|
||||
|
||||
/* Tack on the TCP options. */
|
||||
if (optlen != 0) {
|
||||
optp = (u_int8_t *)(th + 1);
|
||||
*optp++ = TCPOPT_MAXSEG;
|
||||
*optp++ = TCPOLEN_MAXSEG;
|
||||
*optp++ = (mssopt >> 8) & 0xff;
|
||||
*optp++ = mssopt & 0xff;
|
||||
if ((sc->sc_flags & SCF_NOOPT) == 0) {
|
||||
to.to_flags = 0;
|
||||
|
||||
to.to_mss = mssopt;
|
||||
to.to_flags = TOF_MSS;
|
||||
if (sc->sc_flags & SCF_WINSCALE) {
|
||||
*((u_int32_t *)optp) = htonl(TCPOPT_NOP << 24 |
|
||||
TCPOPT_WINDOW << 16 | TCPOLEN_WINDOW << 8 |
|
||||
sc->sc_requested_r_scale);
|
||||
optp += 4;
|
||||
to.to_wscale = sc->sc_requested_r_scale;
|
||||
to.to_flags |= TOF_SCALE;
|
||||
}
|
||||
|
||||
if (sc->sc_flags & SCF_TIMESTAMP) {
|
||||
u_int32_t *lp = (u_int32_t *)(optp);
|
||||
|
||||
/* Form timestamp option per appendix A of RFC 1323. */
|
||||
*lp++ = htonl(TCPOPT_TSTAMP_HDR);
|
||||
if (sc->sc_ts)
|
||||
*lp++ = htonl(sc->sc_ts);
|
||||
else
|
||||
*lp++ = htonl(ticks);
|
||||
*lp = htonl(sc->sc_tsreflect);
|
||||
optp += TCPOLEN_TSTAMP_APPA;
|
||||
/* Virgin timestamp or TCP cookie enhanced one. */
|
||||
to.to_tsval = sc->sc_ts ? sc->sc_ts : ticks;
|
||||
to.to_tsecr = sc->sc_tsreflect;
|
||||
to.to_flags |= TOF_TS;
|
||||
}
|
||||
if (sc->sc_flags & SCF_SACK)
|
||||
to.to_flags |= TOF_SACKPERM;
|
||||
#ifdef TCP_SIGNATURE
|
||||
if (sc->sc_flags & SCF_SIGNATURE)
|
||||
to.to_flags |= TOF_SIGNATURE;
|
||||
#endif
|
||||
optlen = tcp_addoptions(&to, (u_char *)(th + 1));
|
||||
|
||||
#ifdef TCP_SIGNATURE
|
||||
/*
|
||||
* Handle TCP-MD5 passive opener response.
|
||||
*/
|
||||
if (sc->sc_flags & SCF_SIGNATURE) {
|
||||
u_int8_t *bp = optp;
|
||||
int i;
|
||||
tcp_signature_compute(m, sizeof(struct ip), 0, optlen,
|
||||
to.to_signature, IPSEC_DIR_OUTBOUND);
|
||||
#endif
|
||||
|
||||
*bp++ = TCPOPT_SIGNATURE;
|
||||
*bp++ = TCPOLEN_SIGNATURE;
|
||||
for (i = 0; i < TCP_SIGLEN; i++)
|
||||
*bp++ = 0;
|
||||
tcp_signature_compute(m, sizeof(struct ip), 0, optlen,
|
||||
optp + 2, IPSEC_DIR_OUTBOUND);
|
||||
optp += TCPOLEN_SIGNATURE;
|
||||
}
|
||||
#endif /* TCP_SIGNATURE */
|
||||
|
||||
if (sc->sc_flags & SCF_SACK) {
|
||||
*optp++ = TCPOPT_SACK_PERMITTED;
|
||||
*optp++ = TCPOLEN_SACK_PERMITTED;
|
||||
}
|
||||
|
||||
{
|
||||
/* Pad TCP options to a 4 byte boundary */
|
||||
int padlen = optlen - (optp - (u_int8_t *)(th + 1));
|
||||
while (padlen-- > 0)
|
||||
*optp++ = TCPOPT_EOL;
|
||||
}
|
||||
}
|
||||
/* Adjust headers by option size. */
|
||||
th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
|
||||
m->m_len += optlen;
|
||||
m->m_pkthdr.len += optlen;
|
||||
#ifdef INET6
|
||||
if (sc->sc_inc.inc_isipv6)
|
||||
ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) + optlen);
|
||||
#endif
|
||||
ip->ip_len += optlen;
|
||||
} else
|
||||
optlen = 0;
|
||||
|
||||
#ifdef INET6
|
||||
if (sc->sc_inc.inc_isipv6) {
|
||||
@ -1272,7 +1240,7 @@ syncache_respond(struct syncache *sc, struct mbuf *m)
|
||||
#endif
|
||||
{
|
||||
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
|
||||
htons(tlen - hlen + IPPROTO_TCP));
|
||||
htons(tlen + optlen - hlen + IPPROTO_TCP));
|
||||
m->m_pkthdr.csum_flags = CSUM_TCP;
|
||||
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
|
||||
error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL);
|
||||
|
@ -229,22 +229,27 @@ struct tcpcb {
|
||||
* Structure to hold TCP options that are only used during segment
|
||||
* processing (in tcp_input), but not held in the tcpcb.
|
||||
* It's basically used to reduce the number of parameters
|
||||
* to tcp_dooptions.
|
||||
* to tcp_dooptions and tcp_addoptions.
|
||||
* The binary order of the to_flags is relevant for packing of the
|
||||
* options in tcp_addoptions.
|
||||
*/
|
||||
struct tcpopt {
|
||||
u_long to_flags; /* which options are present */
|
||||
#define TOF_TS 0x0001 /* timestamp */
|
||||
#define TOF_MSS 0x0010 /* maximum segment size */
|
||||
#define TOF_SCALE 0x0020 /* window scaling */
|
||||
#define TOF_MSS 0x0001 /* maximum segment size */
|
||||
#define TOF_SCALE 0x0002 /* window scaling */
|
||||
#define TOF_SACKPERM 0x0004 /* SACK permitted */
|
||||
#define TOF_TS 0x0010 /* timestamp */
|
||||
#define TOF_SIGNATURE 0x0040 /* signature option present */
|
||||
#define TOF_SIGLEN 0x0080 /* signature length valid (RFC2385) */
|
||||
#define TOF_SACK 0x0100 /* Peer sent SACK option */
|
||||
u_int32_t to_tsval;
|
||||
u_int32_t to_tsecr;
|
||||
u_int16_t to_mss;
|
||||
u_int8_t to_requested_s_scale;
|
||||
#define TOF_MAXOPT 0x0200
|
||||
u_int32_t to_tsval; /* our new timestamp */
|
||||
u_int32_t to_tsecr; /* reflected timestamp */
|
||||
u_int16_t to_mss; /* maximum segment size */
|
||||
u_int8_t to_wscale; /* window scaling */
|
||||
u_int8_t to_nsacks; /* number of SACK blocks */
|
||||
u_char *to_sacks; /* pointer to the first SACK blocks */
|
||||
u_char *to_signature; /* pointer to the MD5 signature */
|
||||
};
|
||||
|
||||
/*
|
||||
@ -497,6 +502,7 @@ extern int ss_fltsz_local;
|
||||
|
||||
extern int tcp_do_sack; /* SACK enabled/disabled */
|
||||
|
||||
int tcp_addoptions(struct tcpopt *, u_char *);
|
||||
struct tcpcb *
|
||||
tcp_close(struct tcpcb *);
|
||||
void tcp_discardcb(struct tcpcb *);
|
||||
|
Loading…
Reference in New Issue
Block a user