tcp_hpts: rename input queue to drop queue and trim dead code

The HPTS input queue is in reality used only for "delayed drops".
When a TCP stack decides to drop a connection on the output path
it can't do that due to locking protocol between main tcp_output()
and stacks.  So, rack/bbr utilize HPTS to drop the connection in
a different context.

In the past the queue could also process input packets in context
of HPTS thread, but now no stack uses this, so remove this
functionality.

Reviewed by:		rrs
Differential revision:	https://reviews.freebsd.org/D33025
This commit is contained in:
Gleb Smirnoff 2021-12-02 10:48:48 -08:00
parent b0a7c008cb
commit f971e79139
8 changed files with 164 additions and 247 deletions

View File

@ -629,7 +629,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
* If using hpts lets drop a random number in so
* not all new connections fall on the same CPU.
*/
inp->inp_hpts_cpu = inp->inp_input_cpu = hpts_random_cpu(inp);
inp->inp_hpts_cpu = inp->inp_dropq_cpu = hpts_random_cpu(inp);
#endif
refcount_init(&inp->inp_refcount, 1); /* Reference from socket. */
INP_WLOCK(inp);
@ -1760,7 +1760,7 @@ in_pcbrele_rlocked(struct inpcb *inp)
MPASS(inp->inp_flags & INP_FREED);
MPASS(inp->inp_socket == NULL);
MPASS(inp->inp_in_hpts == 0);
MPASS(inp->inp_in_input == 0);
MPASS(inp->inp_in_dropq == 0);
INP_RUNLOCK(inp);
uma_zfree_smr(inp->inp_pcbinfo->ipi_zone, inp);
return (true);
@ -1778,7 +1778,7 @@ in_pcbrele_wlocked(struct inpcb *inp)
MPASS(inp->inp_flags & INP_FREED);
MPASS(inp->inp_socket == NULL);
MPASS(inp->inp_in_hpts == 0);
MPASS(inp->inp_in_input == 0);
MPASS(inp->inp_in_dropq == 0);
INP_WUNLOCK(inp);
uma_zfree_smr(inp->inp_pcbinfo->ipi_zone, inp);
return (true);

View File

@ -234,22 +234,21 @@ struct inpcb {
* fields can *not* be collapsed into a signal bit field.
*/
#if defined(__amd64__) || defined(__i386__)
volatile uint8_t inp_in_hpts; /* on output hpts (lock b) */
volatile uint8_t inp_in_input; /* on input hpts (lock b) */
uint8_t inp_in_hpts; /* on output hpts (lock b) */
uint8_t inp_in_dropq; /* on input hpts (lock b) */
#else
volatile uint32_t inp_in_hpts; /* on output hpts (lock b) */
volatile uint32_t inp_in_input; /* on input hpts (lock b) */
uint32_t inp_in_hpts; /* on output hpts (lock b) */
uint32_t inp_in_dropq; /* on input hpts (lock b) */
#endif
volatile uint16_t inp_hpts_cpu; /* Lock (i) */
volatile uint16_t inp_irq_cpu; /* Set by LRO in behalf of or the driver */
u_int inp_refcount; /* (i) refcount */
int inp_flags; /* (i) generic IP/datagram flags */
int inp_flags2; /* (i) generic IP/datagram flags #2*/
volatile uint16_t inp_input_cpu; /* Lock (i) */
volatile uint8_t inp_hpts_cpu_set :1, /* on output hpts (i) */
inp_input_cpu_set : 1, /* on input hpts (i) */
uint16_t inp_dropq_cpu; /* Lock (i) */
uint8_t inp_hpts_cpu_set :1, /* on output hpts (i) */
inp_dropq_cpu_set : 1, /* on input hpts (i) */
inp_hpts_calls :1, /* (i) from output hpts */
inp_input_calls :1, /* (i) from input hpts */
inp_irq_cpu_set :1, /* (i) from LRO/Driver */
inp_spare_bits2 : 3;
uint8_t inp_numa_domain; /* numa domain */
@ -257,7 +256,8 @@ struct inpcb {
struct socket *inp_socket; /* (i) back pointer to socket */
uint32_t inp_hptsslot; /* Hpts wheel slot this tcb is Lock(i&b) */
uint32_t inp_hpts_drop_reas; /* reason we are dropping the PCB (lock i&b) */
TAILQ_ENTRY(inpcb) inp_input; /* pacing in queue next lock(b) */
uint32_t inp_dropq_gencnt;
TAILQ_ENTRY(inpcb) inp_dropq; /* hpts drop queue next lock(b) */
struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */
struct ucred *inp_cred; /* (c) cache of socket cred */
u_int32_t inp_flow; /* (i) IPv6 flow information */

View File

@ -62,15 +62,7 @@ __FBSDID("$FreeBSD$");
* Of course this is a bare bones example and the stack will probably
* have more consideration then just the above.
*
* Now the second function (actually two functions I guess :D)
* the tcp_hpts system provides is the ability to either abort
* a connection (later) or process input on a connection.
* Why would you want to do this? To keep processor locality
* and or not have to worry about untangling any recursive
* locks. The input function now is hooked to the new LRO
* system as well.
*
* In order to use the input redirection function the
* In order to run input queued segments from the HPTS context the
* tcp stack must define an input function for
* tfb_do_queued_segments(). This function understands
* how to dequeue a array of packets that were input and
@ -109,6 +101,10 @@ __FBSDID("$FreeBSD$");
* you have defined the tfb_do_segment_nounlock() as
* described above.
*
* Now the second function the tcp_hpts system provides is the ability
* to abort a connection later. Why would you want to do this?
* To not have to worry about untangling any recursive locks.
*
* The second feature of the input side of hpts is the
* dropping of a connection. This is due to the way that
* locking may have occured on the INP_WLOCK. So if
@ -202,6 +198,8 @@ __FBSDID("$FreeBSD$");
/* Each hpts has its own p_mtx which is used for locking */
#define HPTS_MTX_ASSERT(hpts) mtx_assert(&(hpts)->p_mtx, MA_OWNED)
#define HPTS_LOCK(hpts) mtx_lock(&(hpts)->p_mtx)
#define HPTS_UNLOCK(hpts) mtx_unlock(&(hpts)->p_mtx)
TAILQ_HEAD(hptsh, inpcb);
struct tcp_hpts_entry {
/* Cache line 0x00 */
@ -226,10 +224,11 @@ struct tcp_hpts_entry {
uint8_t p_fill[3]; /* Fill to 32 bits */
/* Cache line 0x40 */
void *p_inp;
struct hptsh p_input; /* For the tcp-input runner */
TAILQ_HEAD(, inpcb) p_dropq; /* Delayed drop queue */
/* Hptsi wheel */
struct hptsh *p_hptss;
int32_t p_on_inqueue_cnt; /* Count on input queue in this hpts */
uint32_t p_dropq_cnt; /* Count on drop queue */
uint32_t p_dropq_gencnt;
uint32_t p_hpts_sleep_time; /* Current sleep interval having a max
* of 255ms */
uint32_t overidden_sleep; /* what was overrided by min-sleep for logging */
@ -270,7 +269,6 @@ static int hpts_does_tp_logging = 0;
static int hpts_use_assigned_cpu = 1;
static int32_t hpts_uses_oldest = OLDEST_THRESHOLD;
static void tcp_input_data(struct tcp_hpts_entry *hpts, struct timeval *tv);
static int32_t tcp_hptsi(struct tcp_hpts_entry *hpts, int from_callout);
static void tcp_hpts_thread(void *ctx);
static void tcp_init_hptsi(void *st);
@ -558,41 +556,6 @@ hpts_sane_pace_insert(struct tcp_hpts_entry *hpts, struct inpcb *inp, struct hpt
}
}
static inline void
hpts_sane_input_remove(struct tcp_hpts_entry *hpts, struct inpcb *inp, int clear)
{
HPTS_MTX_ASSERT(hpts);
KASSERT(hpts->p_cpu == inp->inp_hpts_cpu,
("%s: hpts:%p inp:%p incorrect CPU", __FUNCTION__, hpts, inp));
KASSERT(inp->inp_in_input != 0,
("%s: hpts:%p inp:%p not on the input hpts?", __FUNCTION__, hpts, inp));
TAILQ_REMOVE(&hpts->p_input, inp, inp_input);
hpts->p_on_inqueue_cnt--;
KASSERT(hpts->p_on_inqueue_cnt >= 0,
("Hpts in goes negative inp:%p hpts:%p",
inp, hpts));
KASSERT((((TAILQ_EMPTY(&hpts->p_input) != 0) && (hpts->p_on_inqueue_cnt == 0)) ||
((TAILQ_EMPTY(&hpts->p_input) == 0) && (hpts->p_on_inqueue_cnt > 0))),
("%s hpts:%p input cnt (p_on_inqueue):%d and queue state mismatch",
__FUNCTION__, hpts, hpts->p_on_inqueue_cnt));
if (clear)
inp->inp_in_input = 0;
}
static inline void
hpts_sane_input_insert(struct tcp_hpts_entry *hpts, struct inpcb *inp, int line)
{
HPTS_MTX_ASSERT(hpts);
KASSERT(hpts->p_cpu == inp->inp_hpts_cpu,
("%s: hpts:%p inp:%p incorrect CPU", __FUNCTION__, hpts, inp));
KASSERT(inp->inp_in_input == 0,
("%s: hpts:%p inp:%p already on the input hpts?", __FUNCTION__, hpts, inp));
TAILQ_INSERT_TAIL(&hpts->p_input, inp, inp_input);
inp->inp_in_input = 1;
hpts->p_on_inqueue_cnt++;
in_pcbref(inp);
}
static struct tcp_hpts_entry *
tcp_hpts_lock(struct inpcb *inp)
{
@ -614,19 +577,19 @@ tcp_hpts_lock(struct inpcb *inp)
}
static struct tcp_hpts_entry *
tcp_input_lock(struct inpcb *inp)
tcp_dropq_lock(struct inpcb *inp)
{
struct tcp_hpts_entry *hpts;
int32_t hpts_num;
again:
hpts_num = inp->inp_input_cpu;
hpts_num = inp->inp_dropq_cpu;
hpts = tcp_pace.rp_ent[hpts_num];
KASSERT(mtx_owned(&hpts->p_mtx) == 0,
("Hpts:%p owns mtx prior-to lock line:%d",
hpts, __LINE__));
mtx_lock(&hpts->p_mtx);
if (hpts_num != inp->inp_input_cpu) {
if (hpts_num != inp->inp_dropq_cpu) {
mtx_unlock(&hpts->p_mtx);
goto again;
}
@ -652,13 +615,38 @@ tcp_hpts_remove_locked_output(struct tcp_hpts_entry *hpts, struct inpcb *inp, in
}
static void
tcp_hpts_remove_locked_input(struct tcp_hpts_entry *hpts, struct inpcb *inp, int32_t flags, int32_t line)
tcp_dropq_remove(struct tcp_hpts_entry *hpts, struct inpcb *inp)
{
bool released __diagused;
HPTS_MTX_ASSERT(hpts);
if (inp->inp_in_input) {
hpts_sane_input_remove(hpts, inp, 1);
tcp_remove_hpts_ref(inp, hpts, line);
INP_WLOCK_ASSERT(inp);
if (inp->inp_in_dropq != IHPTS_ONQUEUE)
return;
MPASS(hpts->p_cpu == inp->inp_dropq_cpu);
if (__predict_true(inp->inp_dropq_gencnt == hpts->p_dropq_gencnt)) {
TAILQ_REMOVE(&hpts->p_dropq, inp, inp_dropq);
MPASS(hpts->p_dropq_cnt > 0);
hpts->p_dropq_cnt--;
inp->inp_in_dropq = IHPTS_NONE;
released = in_pcbrele_wlocked(inp);
MPASS(released == false);
} else {
/*
* tcp_delayed_drop() now owns the TAILQ head of this inp.
* Can't TAILQ_REMOVE, just mark it.
*/
#ifdef INVARIANTS
struct inpcb *tmp;
TAILQ_FOREACH(tmp, &hpts->p_dropq, inp_dropq)
MPASS(tmp != inp);
#endif
inp->inp_in_dropq = IHPTS_MOVING;
}
}
/*
@ -669,7 +657,7 @@ tcp_hpts_remove_locked_input(struct tcp_hpts_entry *hpts, struct inpcb *inp, int
*
* Valid values in the flags are
* HPTS_REMOVE_OUTPUT - remove from the output of the hpts.
* HPTS_REMOVE_INPUT - remove from the input of the hpts.
* HPTS_REMOVE_DROPQ - remove from the drop queue of the hpts.
* Note that you can use one or both values together
* and get two actions.
*/
@ -684,9 +672,9 @@ __tcp_hpts_remove(struct inpcb *inp, int32_t flags, int32_t line)
tcp_hpts_remove_locked_output(hpts, inp, flags, line);
mtx_unlock(&hpts->p_mtx);
}
if (flags & HPTS_REMOVE_INPUT) {
hpts = tcp_input_lock(inp);
tcp_hpts_remove_locked_input(hpts, inp, flags, line);
if (flags & HPTS_REMOVE_DROPQ) {
hpts = tcp_dropq_lock(inp);
tcp_dropq_remove(hpts, inp);
mtx_unlock(&hpts->p_mtx);
}
}
@ -1097,31 +1085,29 @@ __tcp_hpts_insert(struct inpcb *inp, uint32_t slot, int32_t line){
}
void
__tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason, int32_t line)
tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason)
{
struct tcp_hpts_entry *hpts;
struct tcpcb *tp;
struct tcpcb *tp = intotcpcb(inp);
tp = intotcpcb(inp);
hpts = tcp_input_lock(tp->t_inpcb);
if (inp->inp_in_input == 0) {
/* Ok we need to set it on the hpts in the current slot */
hpts_sane_input_insert(hpts, inp, line);
if ((hpts->p_hpts_active == 0) &&
(hpts->p_on_min_sleep == 0)){
/*
* Activate the hpts if it is sleeping.
*/
hpts->p_direct_wake = 1;
tcp_wakehpts(hpts);
}
} else if ((hpts->p_hpts_active == 0) &&
(hpts->p_on_min_sleep == 0)){
INP_WLOCK_ASSERT(inp);
inp->inp_hpts_drop_reas = reason;
if (inp->inp_in_dropq != IHPTS_NONE)
return;
hpts = tcp_dropq_lock(tp->t_inpcb);
MPASS(hpts->p_cpu == inp->inp_dropq_cpu);
TAILQ_INSERT_TAIL(&hpts->p_dropq, inp, inp_dropq);
inp->inp_in_dropq = IHPTS_ONQUEUE;
inp->inp_dropq_gencnt = hpts->p_dropq_gencnt;
hpts->p_dropq_cnt++;
in_pcbref(inp);
if ((hpts->p_hpts_active == 0) && (hpts->p_on_min_sleep == 0)){
hpts->p_direct_wake = 1;
tcp_wakehpts(hpts);
}
inp->inp_hpts_drop_reas = reason;
mtx_unlock(&hpts->p_mtx);
HPTS_UNLOCK(hpts);
}
static uint16_t
@ -1136,8 +1122,8 @@ hpts_random_cpu(struct inpcb *inp){
* If one has been set use it i.e. we want both in and out on the
* same hpts.
*/
if (inp->inp_input_cpu_set) {
return (inp->inp_input_cpu);
if (inp->inp_dropq_cpu_set) {
return (inp->inp_dropq_cpu);
} else if (inp->inp_hpts_cpu_set) {
return (inp->inp_hpts_cpu);
}
@ -1160,8 +1146,8 @@ hpts_cpuid(struct inpcb *inp, int *failed)
* If one has been set use it i.e. we want both in and out on the
* same hpts.
*/
if (inp->inp_input_cpu_set) {
return (inp->inp_input_cpu);
if (inp->inp_dropq_cpu_set) {
return (inp->inp_dropq_cpu);
} else if (inp->inp_hpts_cpu_set) {
return (inp->inp_hpts_cpu);
}
@ -1249,117 +1235,50 @@ tcp_drop_in_pkts(struct tcpcb *tp)
* list.
*/
static void
tcp_input_data(struct tcp_hpts_entry *hpts, struct timeval *tv)
tcp_delayed_drop(struct tcp_hpts_entry *hpts)
{
TAILQ_HEAD(, inpcb) head = TAILQ_HEAD_INITIALIZER(head);
struct inpcb *inp, *tmp;
struct tcpcb *tp;
struct inpcb *inp;
uint16_t drop_reason;
int16_t set_cpu;
uint32_t did_prefetch = 0;
int dropped;
HPTS_MTX_ASSERT(hpts);
NET_EPOCH_ASSERT();
while ((inp = TAILQ_FIRST(&hpts->p_input)) != NULL) {
HPTS_MTX_ASSERT(hpts);
hpts_sane_input_remove(hpts, inp, 0);
if (inp->inp_input_cpu_set == 0) {
set_cpu = 1;
} else {
set_cpu = 0;
}
hpts->p_inp = inp;
drop_reason = inp->inp_hpts_drop_reas;
inp->inp_in_input = 0;
mtx_unlock(&hpts->p_mtx);
TAILQ_SWAP(&head, &hpts->p_dropq, inpcb, inp_dropq);
hpts->p_dropq_cnt = 0;
hpts->p_dropq_gencnt++;
HPTS_UNLOCK(hpts);
TAILQ_FOREACH_SAFE(inp, &head, inp_dropq, tmp) {
INP_WLOCK(inp);
#ifdef VIMAGE
CURVNET_SET(inp->inp_vnet);
#endif
MPASS(inp->inp_hpts_drop_reas != 0);
if (__predict_false(inp->inp_in_dropq == IHPTS_MOVING)) {
inp->inp_in_dropq = IHPTS_NONE;
if (in_pcbrele_wlocked(inp) == false)
INP_WUNLOCK(inp);
continue;
}
MPASS(inp->inp_in_dropq == IHPTS_ONQUEUE);
inp->inp_in_dropq = IHPTS_NONE;
if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED))) {
out:
hpts->p_inp = NULL;
if (in_pcbrele_wlocked(inp) == 0) {
if (in_pcbrele_wlocked(inp) == false)
INP_WUNLOCK(inp);
}
#ifdef VIMAGE
CURVNET_RESTORE();
#endif
mtx_lock(&hpts->p_mtx);
continue;
}
tp = intotcpcb(inp);
if ((tp == NULL) || (tp->t_inpcb == NULL)) {
goto out;
}
if (drop_reason) {
/* This tcb is being destroyed for drop_reason */
CURVNET_SET(inp->inp_vnet);
if (__predict_true((tp = intotcpcb(inp)) != NULL)) {
MPASS(tp->t_inpcb == inp);
tcp_drop_in_pkts(tp);
tp = tcp_drop(tp, drop_reason);
if (tp == NULL) {
tp = tcp_drop(tp, inp->inp_hpts_drop_reas);
if (tp == NULL)
INP_WLOCK(inp);
}
if (in_pcbrele_wlocked(inp) == 0)
INP_WUNLOCK(inp);
#ifdef VIMAGE
CURVNET_RESTORE();
#endif
mtx_lock(&hpts->p_mtx);
continue;
}
if (set_cpu) {
/*
* Setup so the next time we will move to the right
* CPU. This should be a rare event. It will
* sometimes happens when we are the client side
* (usually not the server). Somehow tcp_output()
* gets called before the tcp_do_segment() sets the
* intial state. This means the r_cpu and r_hpts_cpu
* is 0. We get on the hpts, and then tcp_input()
* gets called setting up the r_cpu to the correct
* value. The hpts goes off and sees the mis-match.
* We simply correct it here and the CPU will switch
* to the new hpts nextime the tcb gets added to the
* the hpts (not this time) :-)
*/
tcp_set_hpts(inp);
}
if (tp->t_fb_ptr != NULL) {
kern_prefetch(tp->t_fb_ptr, &did_prefetch);
did_prefetch = 1;
}
if ((tp->t_fb->tfb_do_queued_segments != NULL) && tp->t_in_pkt) {
if (inp->inp_in_input)
tcp_hpts_remove(inp, HPTS_REMOVE_INPUT);
dropped = (*tp->t_fb->tfb_do_queued_segments)(inp->inp_socket, tp, 0);
if (dropped) {
/* Re-acquire the wlock so we can release the reference */
INP_WLOCK(inp);
}
} else if (tp->t_in_pkt) {
/*
* We reach here only if we had a
* stack that supported INP_SUPPORTS_MBUFQ
* and then somehow switched to a stack that
* does not. The packets are basically stranded
* and would hang with the connection until
* cleanup without this code. Its not the
* best way but I know of no other way to
* handle it since the stack needs functions
* it does not have to handle queued packets.
*/
tcp_drop_in_pkts(tp);
}
if (in_pcbrele_wlocked(inp) == 0)
if (in_pcbrele_wlocked(inp) == false)
INP_WUNLOCK(inp);
INP_UNLOCK_ASSERT(inp);
#ifdef VIMAGE
CURVNET_RESTORE();
#endif
mtx_lock(&hpts->p_mtx);
hpts->p_inp = NULL;
}
mtx_lock(&hpts->p_mtx); /* XXXGL */
}
static void
@ -1489,10 +1408,10 @@ tcp_hptsi(struct tcp_hpts_entry *hpts, int from_callout)
hpts->p_nxt_slot = hpts->p_prev_slot;
hpts->p_runningslot = hpts_slot(hpts->p_prev_slot, 1);
}
KASSERT((((TAILQ_EMPTY(&hpts->p_input) != 0) && (hpts->p_on_inqueue_cnt == 0)) ||
((TAILQ_EMPTY(&hpts->p_input) == 0) && (hpts->p_on_inqueue_cnt > 0))),
KASSERT((((TAILQ_EMPTY(&hpts->p_dropq) != 0) && (hpts->p_dropq_cnt == 0)) ||
((TAILQ_EMPTY(&hpts->p_dropq) == 0) && (hpts->p_dropq_cnt > 0))),
("%s hpts:%p in_hpts cnt:%d and queue state mismatch",
__FUNCTION__, hpts, hpts->p_on_inqueue_cnt));
__FUNCTION__, hpts, hpts->p_dropq_cnt));
HPTS_MTX_ASSERT(hpts);
if (hpts->p_on_queue_cnt == 0) {
goto no_one;
@ -1716,10 +1635,10 @@ tcp_hptsi(struct tcp_hpts_entry *hpts, int from_callout)
* Check to see if we took an excess amount of time and need to run
* more ticks (if we did not hit eno-bufs).
*/
KASSERT((((TAILQ_EMPTY(&hpts->p_input) != 0) && (hpts->p_on_inqueue_cnt == 0)) ||
((TAILQ_EMPTY(&hpts->p_input) == 0) && (hpts->p_on_inqueue_cnt > 0))),
KASSERT((((TAILQ_EMPTY(&hpts->p_dropq) != 0) && (hpts->p_dropq_cnt == 0)) ||
((TAILQ_EMPTY(&hpts->p_dropq) == 0) && (hpts->p_dropq_cnt > 0))),
("%s hpts:%p in_hpts cnt:%d queue state mismatch",
__FUNCTION__, hpts, hpts->p_on_inqueue_cnt));
__FUNCTION__, hpts, hpts->p_dropq_cnt));
hpts->p_prev_slot = hpts->p_cur_slot;
hpts->p_lasttick = hpts->p_curtick;
if ((from_callout == 0) || (loop_cnt > max_pacer_loops)) {
@ -1765,31 +1684,30 @@ tcp_hptsi(struct tcp_hpts_entry *hpts, int from_callout)
* Run any input that may be there not covered
* in running data.
*/
if (!TAILQ_EMPTY(&hpts->p_input)) {
tcp_input_data(hpts, &tv);
/*
* Now did we spend too long running input and need to run more ticks?
* Note that if wrap_loop_cnt < 2 then we should have the conditions
* in the KASSERT's true. But if the wheel is behind i.e. wrap_loop_cnt
* is greater than 2, then the condtion most likely are *not* true. Also
* if we are called not from the callout, we don't run the wheel multiple
* times so the slots may not align either.
*/
KASSERT(((hpts->p_prev_slot == hpts->p_cur_slot) ||
(wrap_loop_cnt >= 2) || (from_callout == 0)),
("H:%p p_prev_slot:%u not equal to p_cur_slot:%u", hpts,
hpts->p_prev_slot, hpts->p_cur_slot));
KASSERT(((hpts->p_lasttick == hpts->p_curtick)
|| (wrap_loop_cnt >= 2) || (from_callout == 0)),
("H:%p p_lasttick:%u not equal to p_curtick:%u", hpts,
hpts->p_lasttick, hpts->p_curtick));
if (from_callout && (hpts->p_lasttick != hpts->p_curtick)) {
hpts->p_curtick = tcp_gethptstick(&tv);
counter_u64_add(hpts_loops, 1);
hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick);
goto again;
}
tcp_delayed_drop(hpts);
/*
* Now did we spend too long running input and need to run more ticks?
* Note that if wrap_loop_cnt < 2 then we should have the conditions
* in the KASSERT's true. But if the wheel is behind i.e. wrap_loop_cnt
* is greater than 2, then the condtion most likely are *not* true.
* Also if we are called not from the callout, we don't run the wheel
* multiple times so the slots may not align either.
*/
KASSERT(((hpts->p_prev_slot == hpts->p_cur_slot) ||
(wrap_loop_cnt >= 2) || (from_callout == 0)),
("H:%p p_prev_slot:%u not equal to p_cur_slot:%u", hpts,
hpts->p_prev_slot, hpts->p_cur_slot));
KASSERT(((hpts->p_lasttick == hpts->p_curtick)
|| (wrap_loop_cnt >= 2) || (from_callout == 0)),
("H:%p p_lasttick:%u not equal to p_curtick:%u", hpts,
hpts->p_lasttick, hpts->p_curtick));
if (from_callout && (hpts->p_lasttick != hpts->p_curtick)) {
hpts->p_curtick = tcp_gethptstick(&tv);
counter_u64_add(hpts_loops, 1);
hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick);
goto again;
}
if (from_callout){
tcp_hpts_set_max_sleep(hpts, wrap_loop_cnt);
}
@ -1814,12 +1732,12 @@ __tcp_set_hpts(struct inpcb *inp, int32_t line)
inp->inp_hpts_cpu_set = 1;
}
mtx_unlock(&hpts->p_mtx);
hpts = tcp_input_lock(inp);
if ((inp->inp_input_cpu_set == 0) &&
(inp->inp_in_input == 0)) {
inp->inp_input_cpu = hpts_cpuid(inp, &failed);
hpts = tcp_dropq_lock(inp);
if ((inp->inp_dropq_cpu_set == 0) &&
(inp->inp_in_dropq == 0)) {
inp->inp_dropq_cpu = hpts_cpuid(inp, &failed);
if (failed == 0)
inp->inp_input_cpu_set = 1;
inp->inp_dropq_cpu_set = 1;
}
mtx_unlock(&hpts->p_mtx);
}
@ -2140,7 +2058,7 @@ tcp_init_hptsi(void *st)
*/
mtx_init(&hpts->p_mtx, "tcp_hpts_lck",
"hpts", MTX_DEF | MTX_DUPOK);
TAILQ_INIT(&hpts->p_input);
TAILQ_INIT(&hpts->p_dropq);
for (j = 0; j < NUM_OF_HPTSI_SLOTS; j++) {
TAILQ_INIT(&hpts->p_hptss[j]);
}
@ -2155,8 +2073,8 @@ tcp_init_hptsi(void *st)
SYSCTL_ADD_INT(&hpts->hpts_ctx,
SYSCTL_CHILDREN(hpts->hpts_root),
OID_AUTO, "in_qcnt", CTLFLAG_RD,
&hpts->p_on_inqueue_cnt, 0,
"Count TCB's awaiting input processing");
&hpts->p_dropq_cnt, 0,
"Count TCB's awaiting delayed drop");
SYSCTL_ADD_INT(&hpts->hpts_ctx,
SYSCTL_CHILDREN(hpts->hpts_root),
OID_AUTO, "out_qcnt", CTLFLAG_RD,

View File

@ -116,9 +116,9 @@ struct hpts_diag {
#ifdef _KERNEL
#define tcp_hpts_remove(a, b) __tcp_hpts_remove(a, b, __LINE__)
void __tcp_hpts_remove(struct inpcb *inp, int32_t flags, int32_t line);
#define HPTS_REMOVE_INPUT 0x01
#define HPTS_REMOVE_DROPQ 0x01
#define HPTS_REMOVE_OUTPUT 0x02
#define HPTS_REMOVE_ALL (HPTS_REMOVE_INPUT | HPTS_REMOVE_OUTPUT)
#define HPTS_REMOVE_ALL (HPTS_REMOVE_DROPQ | HPTS_REMOVE_OUTPUT)
static inline bool
tcp_in_hpts(struct inpcb *inp)
@ -160,8 +160,7 @@ tcp_hpts_insert_diag(struct inpcb *inp, uint32_t slot, int32_t line, struct hpts
void __tcp_set_hpts(struct inpcb *inp, int32_t line);
#define tcp_set_hpts(a) __tcp_set_hpts(a, __LINE__)
void __tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason, int32_t line);
#define tcp_set_inp_to_drop(a, b) __tcp_set_inp_to_drop(a, b, __LINE__)
void tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason);
void tcp_run_hpts(void);

View File

@ -1354,7 +1354,7 @@ tcp_lro_flush_tcphpts(struct lro_ctrl *lc, struct lro_entry *le)
if (le->m_head != NULL) {
counter_u64_add(tcp_inp_lro_direct_queue, 1);
tcp_lro_log(tp, lc, le, NULL, 22, 1,
inp->inp_flags2, inp->inp_in_input, 1);
inp->inp_flags2, inp->inp_in_dropq, 1);
tcp_queue_pkts(inp, tp, le);
}
if (should_wake) {

View File

@ -1884,7 +1884,7 @@ bbr_fill_in_logging_data(struct tcp_bbr *bbr, struct tcp_log_bbr *l, uint32_t ct
l->pacing_gain = bbr->r_ctl.rc_bbr_hptsi_gain;
l->cwnd_gain = bbr->r_ctl.rc_bbr_cwnd_gain;
l->inhpts = tcp_in_hpts(bbr->rc_inp);
l->ininput = bbr->rc_inp->inp_in_input;
l->ininput = bbr->rc_inp->inp_in_dropq;
l->use_lt_bw = bbr->rc_lt_use_bw;
l->pkts_out = bbr->r_ctl.rc_flight_at_input;
l->pkt_epoch = bbr->r_ctl.rc_pkt_epoch;

View File

@ -2295,7 +2295,7 @@ rack_log_retran_reason(struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t
log.u_bbr.flex6 = rsm->r_end;
log.u_bbr.flex8 = mod;
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
log.u_bbr.ininput = rack->rc_inp->inp_in_input;
log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
log.u_bbr.timeStamp = tcp_get_usecs(&tv);
log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
@ -2330,7 +2330,7 @@ rack_log_to_start(struct tcp_rack *rack, uint32_t cts, uint32_t to, int32_t slot
else
log.u_bbr.pkts_out = rack->r_ctl.rc_prr_sndcnt;
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
log.u_bbr.ininput = rack->rc_inp->inp_in_input;
log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
log.u_bbr.timeStamp = tcp_get_usecs(&tv);
log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
@ -2355,7 +2355,7 @@ rack_log_to_event(struct tcp_rack *rack, int32_t to_num, struct rack_sendmap *rs
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
log.u_bbr.ininput = rack->rc_inp->inp_in_input;
log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
log.u_bbr.flex8 = to_num;
log.u_bbr.flex1 = rack->r_ctl.rc_rack_min_rtt;
log.u_bbr.flex2 = rack->rc_rack_rtt;
@ -2394,7 +2394,7 @@ rack_log_map_chg(struct tcpcb *tp, struct tcp_rack *rack,
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.flex8 = flag;
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
log.u_bbr.ininput = rack->rc_inp->inp_in_input;
log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
log.u_bbr.cur_del_rate = (uint64_t)prev;
log.u_bbr.delRate = (uint64_t)rsm;
log.u_bbr.rttProp = (uint64_t)next;
@ -2439,7 +2439,7 @@ rack_log_rtt_upd(struct tcpcb *tp, struct tcp_rack *rack, uint32_t t, uint32_t l
struct timeval tv;
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
log.u_bbr.ininput = rack->rc_inp->inp_in_input;
log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
log.u_bbr.flex1 = t;
log.u_bbr.flex2 = len;
log.u_bbr.flex3 = rack->r_ctl.rc_rack_min_rtt;
@ -2589,7 +2589,7 @@ rack_log_progress_event(struct tcp_rack *rack, struct tcpcb *tp, uint32_t tick,
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
log.u_bbr.ininput = rack->rc_inp->inp_in_input;
log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
log.u_bbr.flex1 = line;
log.u_bbr.flex2 = tick;
log.u_bbr.flex3 = tp->t_maxunacktime;
@ -2616,7 +2616,7 @@ rack_log_type_bbrsnd(struct tcp_rack *rack, uint32_t len, uint32_t slot, uint32_
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
log.u_bbr.ininput = rack->rc_inp->inp_in_input;
log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
log.u_bbr.flex1 = slot;
if (rack->rack_no_prr)
log.u_bbr.flex2 = 0;
@ -2718,7 +2718,7 @@ rack_log_type_just_return(struct tcp_rack *rack, uint32_t cts, uint32_t tlen, ui
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
log.u_bbr.ininput = rack->rc_inp->inp_in_input;
log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
log.u_bbr.flex1 = slot;
log.u_bbr.flex2 = rack->r_ctl.rc_hpts_flags;
log.u_bbr.flex4 = reason;
@ -2751,7 +2751,7 @@ rack_log_to_cancel(struct tcp_rack *rack, int32_t hpts_removed, int line, uint32
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
log.u_bbr.ininput = rack->rc_inp->inp_in_input;
log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
log.u_bbr.flex1 = line;
log.u_bbr.flex2 = rack->r_ctl.rc_last_output_to;
log.u_bbr.flex3 = flags_on_entry;
@ -13329,7 +13329,7 @@ rack_log_input_packet(struct tcpcb *tp, struct tcp_rack *rack, struct tcp_ackent
#endif
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
log.u_bbr.ininput = rack->rc_inp->inp_in_input;
log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
if (rack->rack_no_prr == 0)
log.u_bbr.flex1 = rack->r_ctl.rc_prr_sndcnt;
else
@ -14321,7 +14321,7 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
#endif
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
log.u_bbr.ininput = rack->rc_inp->inp_in_input;
log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
if (rack->rack_no_prr == 0)
log.u_bbr.flex1 = rack->r_ctl.rc_prr_sndcnt;
else
@ -15612,7 +15612,7 @@ rack_log_fsb(struct tcp_rack *rack, struct tcpcb *tp, struct socket *so, uint32_
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
log.u_bbr.ininput = rack->rc_inp->inp_in_input;
log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
log.u_bbr.flex1 = error;
log.u_bbr.flex2 = flags;
log.u_bbr.flex3 = rsm_is_null;
@ -16128,7 +16128,7 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
log.u_bbr.ininput = rack->rc_inp->inp_in_input;
log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
if (rack->rack_no_prr)
log.u_bbr.flex1 = 0;
else
@ -16629,7 +16629,7 @@ rack_fast_output(struct tcpcb *tp, struct tcp_rack *rack, uint64_t ts_val,
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
log.u_bbr.ininput = rack->rc_inp->inp_in_input;
log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
if (rack->rack_no_prr)
log.u_bbr.flex1 = 0;
else
@ -18801,7 +18801,7 @@ rack_output(struct tcpcb *tp)
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
log.u_bbr.ininput = rack->rc_inp->inp_in_input;
log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
if (rack->rack_no_prr)
log.u_bbr.flex1 = 0;
else

View File

@ -2096,7 +2096,7 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tp->t_inpcb->inp_in_hpts;
log.u_bbr.ininput = tp->t_inpcb->inp_in_input;
log.u_bbr.ininput = tp->t_inpcb->inp_in_dropq;
log.u_bbr.flex8 = 4;
log.u_bbr.pkts_out = tp->t_maxseg;
log.u_bbr.timeStamp = tcp_get_usecs(&tv);