Pass the number of segments coalesced by LRO up the stack by repurposing the
tso_segsz pkthdr field during RX processing, and use the information in TCP for more correct accounting and as a congestion control input. This is only a start, and an audit of other uses for the data is left as future work. Reviewed by: gallatin, rrs Sponsored by: Netflix, Inc. Differential Revision: https://reviews.freebsd.org/D7564
This commit is contained in:
parent
eb27c4c0e9
commit
4b7b743c16
@ -86,6 +86,7 @@ struct cc_var {
|
||||
struct tcpcb *tcp;
|
||||
struct sctp_nets *sctp;
|
||||
} ccvc;
|
||||
uint16_t nsegs; /* # segments coalesced into current chain. */
|
||||
};
|
||||
|
||||
/* cc_var flags. */
|
||||
|
@ -137,7 +137,8 @@ newreno_ack_received(struct cc_var *ccv, uint16_t type)
|
||||
*/
|
||||
if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max))
|
||||
incr = min(ccv->bytes_this_ack,
|
||||
V_tcp_abc_l_var * CCV(ccv, t_maxseg));
|
||||
ccv->nsegs * V_tcp_abc_l_var *
|
||||
CCV(ccv, t_maxseg));
|
||||
else
|
||||
incr = min(ccv->bytes_this_ack, CCV(ccv, t_maxseg));
|
||||
}
|
||||
|
@ -300,10 +300,12 @@ hhook_run_tcp_est_in(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to)
|
||||
* CC wrapper hook functions
|
||||
*/
|
||||
void
|
||||
cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t type)
|
||||
cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t nsegs,
|
||||
uint16_t type)
|
||||
{
|
||||
INP_WLOCK_ASSERT(tp->t_inpcb);
|
||||
|
||||
tp->ccv->nsegs = nsegs;
|
||||
tp->ccv->bytes_this_ack = BYTES_THIS_ACK(tp, th);
|
||||
if (tp->snd_cwnd <= tp->snd_wnd)
|
||||
tp->ccv->flags |= CCF_CWND_LIMITED;
|
||||
@ -313,7 +315,7 @@ cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t type)
|
||||
if (type == CC_ACK) {
|
||||
if (tp->snd_cwnd > tp->snd_ssthresh) {
|
||||
tp->t_bytes_acked += min(tp->ccv->bytes_this_ack,
|
||||
V_tcp_abc_l_var * tcp_maxseg(tp));
|
||||
nsegs * V_tcp_abc_l_var * tcp_maxseg(tp));
|
||||
if (tp->t_bytes_acked >= tp->snd_cwnd) {
|
||||
tp->t_bytes_acked -= tp->snd_cwnd;
|
||||
tp->ccv->flags |= CCF_ABC_SENTAWND;
|
||||
@ -1502,6 +1504,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
int thflags, acked, ourfinisacked, needoutput = 0, sack_changed;
|
||||
int rstreason, todrop, win;
|
||||
u_long tiwin;
|
||||
uint16_t nsegs;
|
||||
char *s;
|
||||
struct in_conninfo *inc;
|
||||
struct mbuf *mfree;
|
||||
@ -1521,6 +1524,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
inc = &tp->t_inpcb->inp_inc;
|
||||
tp->sackhint.last_sack_ack = 0;
|
||||
sack_changed = 0;
|
||||
nsegs = max(1, m->m_pkthdr.lro_nsegs);
|
||||
|
||||
/*
|
||||
* If this is either a state-changing packet or current state isn't
|
||||
@ -1759,7 +1763,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
/* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */
|
||||
hhook_run_tcp_est_in(tp, th, &to);
|
||||
|
||||
TCPSTAT_INC(tcps_rcvackpack);
|
||||
TCPSTAT_ADD(tcps_rcvackpack, nsegs);
|
||||
TCPSTAT_ADD(tcps_rcvackbyte, acked);
|
||||
sbdrop(&so->so_snd, acked);
|
||||
if (SEQ_GT(tp->snd_una, tp->snd_recover) &&
|
||||
@ -1772,7 +1776,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
* typically means increasing the congestion
|
||||
* window.
|
||||
*/
|
||||
cc_ack_received(tp, th, CC_ACK);
|
||||
cc_ack_received(tp, th, nsegs, CC_ACK);
|
||||
|
||||
tp->snd_una = th->th_ack;
|
||||
/*
|
||||
@ -1838,7 +1842,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
* rcv_nxt.
|
||||
*/
|
||||
tp->rcv_up = tp->rcv_nxt;
|
||||
TCPSTAT_INC(tcps_rcvpack);
|
||||
TCPSTAT_ADD(tcps_rcvpack, nsegs);
|
||||
TCPSTAT_ADD(tcps_rcvbyte, tlen);
|
||||
#ifdef TCPDEBUG
|
||||
if (so->so_options & SO_DEBUG)
|
||||
@ -2570,7 +2574,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
tp->t_dupacks = 0;
|
||||
else if (++tp->t_dupacks > tcprexmtthresh ||
|
||||
IN_FASTRECOVERY(tp->t_flags)) {
|
||||
cc_ack_received(tp, th, CC_DUPACK);
|
||||
cc_ack_received(tp, th, nsegs,
|
||||
CC_DUPACK);
|
||||
if ((tp->t_flags & TF_SACK_PERMIT) &&
|
||||
IN_FASTRECOVERY(tp->t_flags)) {
|
||||
int awnd;
|
||||
@ -2620,7 +2625,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
}
|
||||
/* Congestion signal before ack. */
|
||||
cc_cong_signal(tp, th, CC_NDUPACK);
|
||||
cc_ack_received(tp, th, CC_DUPACK);
|
||||
cc_ack_received(tp, th, nsegs,
|
||||
CC_DUPACK);
|
||||
tcp_timer_activate(tp, TT_REXMT, 0);
|
||||
tp->t_rtttime = 0;
|
||||
if (tp->t_flags & TF_SACK_PERMIT) {
|
||||
@ -2654,7 +2660,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
* segment. Restore the original
|
||||
* snd_cwnd after packet transmission.
|
||||
*/
|
||||
cc_ack_received(tp, th, CC_DUPACK);
|
||||
cc_ack_received(tp, th, nsegs,
|
||||
CC_DUPACK);
|
||||
u_long oldcwnd = tp->snd_cwnd;
|
||||
tcp_seq oldsndmax = tp->snd_max;
|
||||
u_int sent;
|
||||
@ -2756,7 +2763,7 @@ process_ACK:
|
||||
KASSERT(acked >= 0, ("%s: acked unexepectedly negative "
|
||||
"(tp->snd_una=%u, th->th_ack=%u, tp=%p, m=%p)", __func__,
|
||||
tp->snd_una, th->th_ack, tp, m));
|
||||
TCPSTAT_INC(tcps_rcvackpack);
|
||||
TCPSTAT_ADD(tcps_rcvackpack, nsegs);
|
||||
TCPSTAT_ADD(tcps_rcvackbyte, acked);
|
||||
|
||||
/*
|
||||
@ -2821,7 +2828,7 @@ process_ACK:
|
||||
* control related information. This typically means increasing
|
||||
* the congestion window.
|
||||
*/
|
||||
cc_ack_received(tp, th, CC_ACK);
|
||||
cc_ack_received(tp, th, nsegs, CC_ACK);
|
||||
|
||||
SOCKBUF_LOCK(&so->so_snd);
|
||||
if (acked > sbavail(&so->so_snd)) {
|
||||
|
@ -392,6 +392,7 @@ tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
|
||||
#endif
|
||||
}
|
||||
|
||||
le->m_head->m_pkthdr.lro_nsegs = le->append_cnt + 1;
|
||||
(*lc->ifp->if_input)(lc->ifp, le->m_head);
|
||||
lc->lro_queued += le->append_cnt + 1;
|
||||
lc->lro_flushed++;
|
||||
|
@ -784,7 +784,7 @@ void tcp_pulloutofband(struct socket *,
|
||||
void tcp_xmit_timer(struct tcpcb *, int);
|
||||
void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
|
||||
void cc_ack_received(struct tcpcb *tp, struct tcphdr *th,
|
||||
uint16_t type);
|
||||
uint16_t nsegs, uint16_t type);
|
||||
void cc_conn_init(struct tcpcb *tp);
|
||||
void cc_post_recovery(struct tcpcb *tp, struct tcphdr *th);
|
||||
void cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type);
|
||||
|
@ -174,6 +174,7 @@ struct pkthdr {
|
||||
#define PH_vt PH_per
|
||||
#define vt_nrecs sixteen[0]
|
||||
#define tso_segsz PH_per.sixteen[1]
|
||||
#define lro_nsegs tso_segsz
|
||||
#define csum_phsum PH_per.sixteen[2]
|
||||
#define csum_data PH_per.thirtytwo[1]
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user