Separate out send buffer autoscaling code into function, so that

alternative TCP stacks may reuse it instead of pasting.

Obtained from:	Netflix
This commit is contained in:
Gleb Smirnoff 2017-12-07 22:36:58 +00:00
parent 9f0abda051
commit 66492fea49
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=326673
2 changed files with 61 additions and 53 deletions

View File

@ -487,59 +487,7 @@ tcp_output(struct tcpcb *tp)
/* len will be >= 0 after this point. */
KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__));
/*
* Automatic sizing of send socket buffer. Often the send buffer
* size is not optimally adjusted to the actual network conditions
* at hand (delay bandwidth product). Setting the buffer size too
* small limits throughput on links with high bandwidth and high
* delay (eg. trans-continental/oceanic links). Setting the
* buffer size too big consumes too much real kernel memory,
* especially with many connections on busy servers.
*
* The criteria to step up the send buffer one notch are:
* 1. receive window of remote host is larger than send buffer
* (with a fudge factor of 5/4th);
* 2. send buffer is filled to 7/8th with data (so we actually
* have data to make use of it);
* 3. send buffer fill has not hit maximal automatic size;
* 4. our send window (slow start and cogestion controlled) is
* larger than sent but unacknowledged data in send buffer.
*
* The remote host receive window scaling factor may limit the
* growing of the send buffer before it reaches its allowed
* maximum.
*
* It scales directly with slow start or congestion window
* and does at most one step per received ACK. This fast
* scaling has the drawback of growing the send buffer beyond
* what is strictly necessary to make full use of a given
* delay*bandwidth product. However testing has shown this not
* to be much of an problem. At worst we are trading wasting
* of available bandwidth (the non-use of it) for wasting some
* socket buffer memory.
*
* TODO: Shrink send buffer during idle periods together
* with congestion window. Requires another timer. Has to
* wait for upcoming tcp timer rewrite.
*
* XXXGL: should there be used sbused() or sbavail()?
*/
if (V_tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) {
int lowat;
lowat = V_tcp_sendbuf_auto_lowat ? so->so_snd.sb_lowat : 0;
if ((tp->snd_wnd / 4 * 5) >= so->so_snd.sb_hiwat - lowat &&
sbused(&so->so_snd) >=
(so->so_snd.sb_hiwat / 8 * 7) - lowat &&
sbused(&so->so_snd) < V_tcp_autosndbuf_max &&
sendwin >= (sbused(&so->so_snd) -
(tp->snd_nxt - tp->snd_una))) {
if (!sbreserve_locked(&so->so_snd,
min(so->so_snd.sb_hiwat + V_tcp_autosndbuf_inc,
V_tcp_autosndbuf_max), so, curthread))
so->so_snd.sb_flags &= ~SB_AUTOSIZE;
}
}
tcp_sndbuf_autoscale(tp, so, sendwin);
/*
* Decide if we can use TCP Segmentation Offloading (if supported by
@ -1858,3 +1806,62 @@ tcp_addoptions(struct tcpopt *to, u_char *optp)
KASSERT(optlen <= TCP_MAXOLEN, ("%s: TCP options too long", __func__));
return (optlen);
}
void
tcp_sndbuf_autoscale(struct tcpcb *tp, struct socket *so, uint32_t sendwin)
{
/*
* Automatic sizing of send socket buffer. Often the send buffer
* size is not optimally adjusted to the actual network conditions
* at hand (delay bandwidth product). Setting the buffer size too
* small limits throughput on links with high bandwidth and high
* delay (eg. trans-continental/oceanic links). Setting the
* buffer size too big consumes too much real kernel memory,
* especially with many connections on busy servers.
*
* The criteria to step up the send buffer one notch are:
* 1. receive window of remote host is larger than send buffer
* (with a fudge factor of 5/4th);
* 2. send buffer is filled to 7/8th with data (so we actually
* have data to make use of it);
* 3. send buffer fill has not hit maximal automatic size;
* 4. our send window (slow start and cogestion controlled) is
* larger than sent but unacknowledged data in send buffer.
*
* The remote host receive window scaling factor may limit the
* growing of the send buffer before it reaches its allowed
* maximum.
*
* It scales directly with slow start or congestion window
* and does at most one step per received ACK. This fast
* scaling has the drawback of growing the send buffer beyond
* what is strictly necessary to make full use of a given
* delay*bandwidth product. However testing has shown this not
* to be much of an problem. At worst we are trading wasting
* of available bandwidth (the non-use of it) for wasting some
* socket buffer memory.
*
* TODO: Shrink send buffer during idle periods together
* with congestion window. Requires another timer. Has to
* wait for upcoming tcp timer rewrite.
*
* XXXGL: should there be used sbused() or sbavail()?
*/
if (V_tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) {
int lowat;
lowat = V_tcp_sendbuf_auto_lowat ? so->so_snd.sb_lowat : 0;
if ((tp->snd_wnd / 4 * 5) >= so->so_snd.sb_hiwat - lowat &&
sbused(&so->so_snd) >=
(so->so_snd.sb_hiwat / 8 * 7) - lowat &&
sbused(&so->so_snd) < V_tcp_autosndbuf_max &&
sendwin >= (sbused(&so->so_snd) -
(tp->snd_nxt - tp->snd_una))) {
if (!sbreserve_locked(&so->so_snd,
min(so->so_snd.sb_hiwat + V_tcp_autosndbuf_inc,
V_tcp_autosndbuf_max), so, curthread))
so->so_snd.sb_flags &= ~SB_AUTOSIZE;
}
}
}

View File

@ -884,6 +884,7 @@ void tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
void tcp_free_sackholes(struct tcpcb *tp);
int tcp_newreno(struct tcpcb *, struct tcphdr *);
int tcp_compute_pipe(struct tcpcb *);
void tcp_sndbuf_autoscale(struct tcpcb *, struct socket *, uint32_t);
static inline void
tcp_fields_to_host(struct tcphdr *th)