This adds the third step in getting BBR into the tree. BBR and
an updated rack depend on having access to the new ratelimit api in this commit. Sponsored by: Netflix Inc. Differential Revision: https://reviews.freebsd.org/D20953
This commit is contained in:
parent
1214ed36dc
commit
8a34b17735
@ -4276,6 +4276,7 @@ netinet/tcp_lro.c optional inet | inet6
|
||||
netinet/tcp_output.c optional inet | inet6
|
||||
netinet/tcp_offload.c optional tcp_offload inet | tcp_offload inet6
|
||||
netinet/tcp_hpts.c optional tcphpts inet | tcphpts inet6
|
||||
netinet/tcp_ratelimit.c optional ratelimit inet | ratelimit inet6
|
||||
netinet/tcp_pcap.c optional inet tcppcap | inet6 tcppcap \
|
||||
compile-with "${NORMAL_C} ${NO_WNONNULL}"
|
||||
netinet/tcp_reass.c optional inet | inet6
|
||||
|
@ -1247,6 +1247,7 @@ int cxgbe_snd_tag_modify(struct m_snd_tag *, union if_snd_tag_modify_params *);
|
||||
int cxgbe_snd_tag_query(struct m_snd_tag *, union if_snd_tag_query_params *);
|
||||
void cxgbe_snd_tag_free(struct m_snd_tag *);
|
||||
void cxgbe_snd_tag_free_locked(struct cxgbe_snd_tag *);
|
||||
void cxgbe_ratelimit_query(struct ifnet *, struct if_ratelimit_query_results *);
|
||||
#endif
|
||||
|
||||
/* t4_filter.c */
|
||||
|
@ -1658,6 +1658,7 @@ cxgbe_vi_attach(device_t dev, struct vi_info *vi)
|
||||
ifp->if_snd_tag_modify = cxgbe_snd_tag_modify;
|
||||
ifp->if_snd_tag_query = cxgbe_snd_tag_query;
|
||||
ifp->if_snd_tag_free = cxgbe_snd_tag_free;
|
||||
ifp->if_ratelimit_query = cxgbe_ratelimit_query;
|
||||
#endif
|
||||
|
||||
ifp->if_capabilities = T4_CAP;
|
||||
|
@ -903,4 +903,35 @@ cxgbe_snd_tag_free(struct m_snd_tag *mst)
|
||||
}
|
||||
mtx_unlock(&cst->lock);
|
||||
}
|
||||
|
||||
#define CXGBE_MAX_FLOWS 4000 /* Testing show so far thats all this adapter can do */
|
||||
#define CXGBE_UNIQUE_RATE_COUNT 16 /* Number of unique rates that can be setup */
|
||||
|
||||
void
|
||||
cxgbe_ratelimit_query(struct ifnet *ifp __unused,
|
||||
struct if_ratelimit_query_results *q)
|
||||
{
|
||||
/*
|
||||
* This is a skeleton and needs future work
|
||||
* by the driver supporters. It should be
|
||||
* enhanced to look at the specific type of
|
||||
* interface and select approprate values
|
||||
* for these settings. This example goes
|
||||
* with an earlier card (t5), it has a maximum
|
||||
* number of 16 rates that the first guys in
|
||||
* select (thus the flags value RT_IS_SELECTABLE).
|
||||
* If it was a fixed table then we would setup a
|
||||
* const array (example mlx5). Note the card tested
|
||||
* can only support reasonably 4000 flows before
|
||||
* the adapter has issues with sending so here
|
||||
* we limit the number of flows using hardware
|
||||
* pacing to that number, other cards may
|
||||
* be able to raise or eliminate this limit.
|
||||
*/
|
||||
q->rate_table = NULL;
|
||||
q->flags = RT_IS_SELECTABLE;
|
||||
q->max_flows = CXGBE_MAX_FLOWS;
|
||||
q->number_of_rates = CXGBE_UNIQUE_RATE_COUNT;
|
||||
q->min_segment_burst = 4; /* Driver emits 4 in a burst */
|
||||
}
|
||||
#endif
|
||||
|
@ -4070,6 +4070,48 @@ mlx5e_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params
|
||||
}
|
||||
}
|
||||
|
||||
#define NUM_HDWR_RATES_MLX 13
|
||||
static const uint64_t adapter_rates_mlx[NUM_HDWR_RATES_MLX] = {
|
||||
135375, /* 1,083,000 */
|
||||
180500, /* 1,444,000 */
|
||||
270750, /* 2,166,000 */
|
||||
361000, /* 2,888,000 */
|
||||
541500, /* 4,332,000 */
|
||||
721875, /* 5,775,000 */
|
||||
1082875, /* 8,663,000 */
|
||||
1443875, /* 11,551,000 */
|
||||
2165750, /* 17,326,000 */
|
||||
2887750, /* 23,102,000 */
|
||||
4331625, /* 34,653,000 */
|
||||
5775500, /* 46,204,000 */
|
||||
8663125 /* 69,305,000 */
|
||||
};
|
||||
|
||||
static void
|
||||
mlx5e_ratelimit_query(struct ifnet *ifp __unused, struct if_ratelimit_query_results *q)
|
||||
{
|
||||
/*
|
||||
* This function needs updating by the driver maintainer!
|
||||
* For the MLX card there are currently (ConectX-4?) 13
|
||||
* pre-set rates and others i.e. ConnectX-5, 6, 7??
|
||||
*
|
||||
* This will change based on later adapters
|
||||
* and this code should be updated to look at ifp
|
||||
* and figure out the specific adapter type
|
||||
* settings i.e. how many rates as well
|
||||
* as if they are fixed (as is shown here) or
|
||||
* if they are dynamic (example chelsio t4). Also if there
|
||||
* is a maximum number of flows that the adapter
|
||||
* can handle that too needs to be updated in
|
||||
* the max_flows field.
|
||||
*/
|
||||
q->rate_table = adapter_rates_mlx;
|
||||
q->flags = RT_IS_FIXED_TABLE;
|
||||
q->max_flows = 0; /* mlx has no limit */
|
||||
q->number_of_rates = NUM_HDWR_RATES_MLX;
|
||||
q->min_segment_burst = 1;
|
||||
}
|
||||
|
||||
static void
|
||||
mlx5e_snd_tag_free(struct m_snd_tag *pmt)
|
||||
{
|
||||
@ -4155,7 +4197,9 @@ mlx5e_create_ifp(struct mlx5_core_dev *mdev)
|
||||
ifp->if_snd_tag_free = mlx5e_snd_tag_free;
|
||||
ifp->if_snd_tag_modify = mlx5e_snd_tag_modify;
|
||||
ifp->if_snd_tag_query = mlx5e_snd_tag_query;
|
||||
|
||||
#ifdef RATELIMIT
|
||||
ifp->if_ratelimit_query = mlx5e_ratelimit_query;
|
||||
#endif
|
||||
/* set TSO limits so that we don't have to drop TX packets */
|
||||
ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
|
||||
ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
|
||||
|
@ -126,6 +126,23 @@ ifdead_snd_tag_free(struct m_snd_tag *pmt)
|
||||
{
|
||||
}
|
||||
|
||||
static void
|
||||
ifdead_ratelimit_query(struct ifnet *ifp __unused,
|
||||
struct if_ratelimit_query_results *q)
|
||||
{
|
||||
/*
|
||||
* This guy does not support
|
||||
* this interface. Not sure
|
||||
* why we would specify a
|
||||
* flag on the interface
|
||||
* that says we do.
|
||||
*/
|
||||
q->rate_table = NULL;
|
||||
q->flags = RT_NOSUPPORT;
|
||||
q->max_flows = 0;
|
||||
q->number_of_rates = 0;
|
||||
}
|
||||
|
||||
void
|
||||
if_dead(struct ifnet *ifp)
|
||||
{
|
||||
@ -142,4 +159,5 @@ if_dead(struct ifnet *ifp)
|
||||
ifp->if_snd_tag_modify = ifdead_snd_tag_modify;
|
||||
ifp->if_snd_tag_query = ifdead_snd_tag_query;
|
||||
ifp->if_snd_tag_free = ifdead_snd_tag_free;
|
||||
ifp->if_ratelimit_query = ifdead_ratelimit_query;
|
||||
}
|
||||
|
@ -144,6 +144,8 @@ static int lagg_snd_tag_modify(struct m_snd_tag *,
|
||||
static int lagg_snd_tag_query(struct m_snd_tag *,
|
||||
union if_snd_tag_query_params *);
|
||||
static void lagg_snd_tag_free(struct m_snd_tag *);
|
||||
static void lagg_ratelimit_query(struct ifnet *,
|
||||
struct if_ratelimit_query_results *);
|
||||
#endif
|
||||
static int lagg_setmulti(struct lagg_port *);
|
||||
static int lagg_clrmulti(struct lagg_port *);
|
||||
@ -537,6 +539,7 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
|
||||
ifp->if_snd_tag_modify = lagg_snd_tag_modify;
|
||||
ifp->if_snd_tag_query = lagg_snd_tag_query;
|
||||
ifp->if_snd_tag_free = lagg_snd_tag_free;
|
||||
ifp->if_ratelimit_query = lagg_ratelimit_query;
|
||||
#endif
|
||||
ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
|
||||
|
||||
@ -1670,6 +1673,20 @@ lagg_snd_tag_free(struct m_snd_tag *mst)
|
||||
free(lst, M_LAGG);
|
||||
}
|
||||
|
||||
static void
|
||||
lagg_ratelimit_query(struct ifnet *ifp __unused, struct if_ratelimit_query_results *q)
|
||||
{
|
||||
/*
|
||||
* For lagg, we have an indirect
|
||||
* interface. The caller needs to
|
||||
* get a ratelimit tag on the actual
|
||||
* interface the flow will go on.
|
||||
*/
|
||||
q->rate_table = NULL;
|
||||
q->flags = RT_IS_INDIRECT;
|
||||
q->max_flows = 0;
|
||||
q->number_of_rates = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
|
@ -203,6 +203,8 @@ struct if_snd_tag_alloc_header {
|
||||
struct if_snd_tag_alloc_rate_limit {
|
||||
struct if_snd_tag_alloc_header hdr;
|
||||
uint64_t max_rate; /* in bytes/s */
|
||||
uint32_t flags; /* M_NOWAIT or M_WAITOK */
|
||||
uint32_t reserved; /* alignment */
|
||||
};
|
||||
|
||||
struct if_snd_tag_rate_limit_params {
|
||||
@ -210,7 +212,7 @@ struct if_snd_tag_rate_limit_params {
|
||||
uint32_t queue_level; /* 0 (empty) .. 65535 (full) */
|
||||
#define IF_SND_QUEUE_LEVEL_MIN 0
|
||||
#define IF_SND_QUEUE_LEVEL_MAX 65535
|
||||
uint32_t reserved; /* padding */
|
||||
uint32_t flags; /* M_NOWAIT or M_WAITOK */
|
||||
};
|
||||
|
||||
union if_snd_tag_alloc_params {
|
||||
@ -229,11 +231,37 @@ union if_snd_tag_query_params {
|
||||
struct if_snd_tag_rate_limit_params unlimited;
|
||||
};
|
||||
|
||||
/* Query return flags */
|
||||
#define RT_NOSUPPORT 0x00000000 /* Not supported */
|
||||
#define RT_IS_INDIRECT 0x00000001 /*
|
||||
* Interface like a lagg, select
|
||||
* the actual interface for
|
||||
* capabilities.
|
||||
*/
|
||||
#define RT_IS_SELECTABLE 0x00000002 /*
|
||||
* No rate table, you select
|
||||
* rates and the first
|
||||
* number_of_rates are created.
|
||||
*/
|
||||
#define RT_IS_FIXED_TABLE 0x00000004 /* A fixed table is attached */
|
||||
#define RT_IS_UNUSABLE 0x00000008 /* It is not usable for this */
|
||||
|
||||
struct if_ratelimit_query_results {
|
||||
const uint64_t *rate_table; /* Pointer to table if present */
|
||||
uint32_t flags; /* Flags indicating results */
|
||||
uint32_t max_flows; /* Max flows using, 0=unlimited */
|
||||
uint32_t number_of_rates; /* How many unique rates can be created */
|
||||
uint32_t min_segment_burst; /* The amount the adapter bursts at each send */
|
||||
};
|
||||
|
||||
typedef int (if_snd_tag_alloc_t)(struct ifnet *, union if_snd_tag_alloc_params *,
|
||||
struct m_snd_tag **);
|
||||
typedef int (if_snd_tag_modify_t)(struct m_snd_tag *, union if_snd_tag_modify_params *);
|
||||
typedef int (if_snd_tag_query_t)(struct m_snd_tag *, union if_snd_tag_query_params *);
|
||||
typedef void (if_snd_tag_free_t)(struct m_snd_tag *);
|
||||
typedef void (if_ratelimit_query_t)(struct ifnet *,
|
||||
struct if_ratelimit_query_results *);
|
||||
|
||||
|
||||
/*
|
||||
* Structure defining a network interface.
|
||||
@ -374,6 +402,7 @@ struct ifnet {
|
||||
if_snd_tag_modify_t *if_snd_tag_modify;
|
||||
if_snd_tag_query_t *if_snd_tag_query;
|
||||
if_snd_tag_free_t *if_snd_tag_free;
|
||||
if_ratelimit_query_t *if_ratelimit_query;
|
||||
|
||||
/* Ethernet PCP */
|
||||
uint8_t if_pcp;
|
||||
|
@ -210,6 +210,22 @@ SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtime,
|
||||
&VNET_NAME(ipport_randomtime), 0,
|
||||
"Minimum time to keep sequental port "
|
||||
"allocation before switching to a random one");
|
||||
|
||||
#ifdef RATELIMIT
|
||||
counter_u64_t rate_limit_active;
|
||||
counter_u64_t rate_limit_alloc_fail;
|
||||
counter_u64_t rate_limit_set_ok;
|
||||
|
||||
static SYSCTL_NODE(_net_inet_ip, OID_AUTO, rl, CTLFLAG_RD, 0,
|
||||
"IP Rate Limiting");
|
||||
SYSCTL_COUNTER_U64(_net_inet_ip_rl, OID_AUTO, active, CTLFLAG_RD,
|
||||
&rate_limit_active, "Active rate limited connections");
|
||||
SYSCTL_COUNTER_U64(_net_inet_ip_rl, OID_AUTO, alloc_fail, CTLFLAG_RD,
|
||||
&rate_limit_alloc_fail, "Rate limited connection failures");
|
||||
SYSCTL_COUNTER_U64(_net_inet_ip_rl, OID_AUTO, set_ok, CTLFLAG_RD,
|
||||
&rate_limit_set_ok, "Rate limited setting succeeded");
|
||||
#endif /* RATELIMIT */
|
||||
|
||||
#endif /* INET */
|
||||
|
||||
/*
|
||||
@ -3170,6 +3186,7 @@ in_pcbmodify_txrtlmt(struct inpcb *inp, uint32_t max_pacing_rate)
|
||||
{
|
||||
union if_snd_tag_modify_params params = {
|
||||
.rate_limit.max_rate = max_pacing_rate,
|
||||
.rate_limit.flags = M_NOWAIT,
|
||||
};
|
||||
struct m_snd_tag *mst;
|
||||
struct ifnet *ifp;
|
||||
@ -3256,7 +3273,8 @@ in_pcbquery_txrlevel(struct inpcb *inp, uint32_t *p_txqueue_level)
|
||||
*/
|
||||
int
|
||||
in_pcbattach_txrtlmt(struct inpcb *inp, struct ifnet *ifp,
|
||||
uint32_t flowtype, uint32_t flowid, uint32_t max_pacing_rate)
|
||||
uint32_t flowtype, uint32_t flowid, uint32_t max_pacing_rate, struct m_snd_tag **st)
|
||||
|
||||
{
|
||||
union if_snd_tag_alloc_params params = {
|
||||
.rate_limit.hdr.type = (max_pacing_rate == -1U) ?
|
||||
@ -3264,22 +3282,47 @@ in_pcbattach_txrtlmt(struct inpcb *inp, struct ifnet *ifp,
|
||||
.rate_limit.hdr.flowid = flowid,
|
||||
.rate_limit.hdr.flowtype = flowtype,
|
||||
.rate_limit.max_rate = max_pacing_rate,
|
||||
.rate_limit.flags = M_NOWAIT,
|
||||
};
|
||||
int error;
|
||||
|
||||
INP_WLOCK_ASSERT(inp);
|
||||
|
||||
if (inp->inp_snd_tag != NULL)
|
||||
if (*st != NULL)
|
||||
return (EINVAL);
|
||||
|
||||
if (ifp->if_snd_tag_alloc == NULL) {
|
||||
error = EOPNOTSUPP;
|
||||
} else {
|
||||
error = ifp->if_snd_tag_alloc(ifp, ¶ms, &inp->inp_snd_tag);
|
||||
|
||||
if (error == 0) {
|
||||
counter_u64_add(rate_limit_set_ok, 1);
|
||||
counter_u64_add(rate_limit_active, 1);
|
||||
} else
|
||||
counter_u64_add(rate_limit_alloc_fail, 1);
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
void
|
||||
in_pcbdetach_tag(struct ifnet *ifp, struct m_snd_tag *mst)
|
||||
{
|
||||
if (ifp == NULL)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If the device was detached while we still had reference(s)
|
||||
* on the ifp, we assume if_snd_tag_free() was replaced with
|
||||
* stubs.
|
||||
*/
|
||||
ifp->if_snd_tag_free(mst);
|
||||
|
||||
/* release reference count on network interface */
|
||||
if_rele(ifp);
|
||||
counter_u64_add(rate_limit_active, -1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free an existing TX rate limit tag based on the "inp->inp_snd_tag",
|
||||
* if any:
|
||||
@ -3300,6 +3343,56 @@ in_pcbdetach_txrtlmt(struct inpcb *inp)
|
||||
m_snd_tag_rele(mst);
|
||||
}
|
||||
|
||||
int
|
||||
in_pcboutput_txrtlmt_locked(struct inpcb *inp, struct ifnet *ifp, struct mbuf *mb, uint32_t max_pacing_rate)
|
||||
{
|
||||
int error;
|
||||
|
||||
/*
|
||||
* If the existing send tag is for the wrong interface due to
|
||||
* a route change, first drop the existing tag. Set the
|
||||
* CHANGED flag so that we will keep trying to allocate a new
|
||||
* tag if we fail to allocate one this time.
|
||||
*/
|
||||
if (inp->inp_snd_tag != NULL && inp->inp_snd_tag->ifp != ifp) {
|
||||
in_pcbdetach_txrtlmt(inp);
|
||||
inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
|
||||
}
|
||||
|
||||
/*
|
||||
* NOTE: When attaching to a network interface a reference is
|
||||
* made to ensure the network interface doesn't go away until
|
||||
* all ratelimit connections are gone. The network interface
|
||||
* pointers compared below represent valid network interfaces,
|
||||
* except when comparing towards NULL.
|
||||
*/
|
||||
if (max_pacing_rate == 0 && inp->inp_snd_tag == NULL) {
|
||||
error = 0;
|
||||
} else if (!(ifp->if_capenable & IFCAP_TXRTLMT)) {
|
||||
if (inp->inp_snd_tag != NULL)
|
||||
in_pcbdetach_txrtlmt(inp);
|
||||
error = 0;
|
||||
} else if (inp->inp_snd_tag == NULL) {
|
||||
/*
|
||||
* In order to utilize packet pacing with RSS, we need
|
||||
* to wait until there is a valid RSS hash before we
|
||||
* can proceed:
|
||||
*/
|
||||
if (M_HASHTYPE_GET(mb) == M_HASHTYPE_NONE) {
|
||||
error = EAGAIN;
|
||||
} else {
|
||||
error = in_pcbattach_txrtlmt(inp, ifp, M_HASHTYPE_GET(mb),
|
||||
mb->m_pkthdr.flowid, max_pacing_rate, &inp->inp_snd_tag);
|
||||
}
|
||||
} else {
|
||||
error = in_pcbmodify_txrtlmt(inp, max_pacing_rate);
|
||||
}
|
||||
if (error == 0 || error == EOPNOTSUPP)
|
||||
inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED;
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function should be called when the INP_RATE_LIMIT_CHANGED flag
|
||||
* is set in the fast path and will attach/detach/modify the TX rate
|
||||
@ -3342,47 +3435,8 @@ in_pcboutput_txrtlmt(struct inpcb *inp, struct ifnet *ifp, struct mbuf *mb)
|
||||
*/
|
||||
max_pacing_rate = socket->so_max_pacing_rate;
|
||||
|
||||
/*
|
||||
* If the existing send tag is for the wrong interface due to
|
||||
* a route change, first drop the existing tag. Set the
|
||||
* CHANGED flag so that we will keep trying to allocate a new
|
||||
* tag if we fail to allocate one this time.
|
||||
*/
|
||||
if (inp->inp_snd_tag != NULL && inp->inp_snd_tag->ifp != ifp) {
|
||||
in_pcbdetach_txrtlmt(inp);
|
||||
inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
|
||||
}
|
||||
error = in_pcboutput_txrtlmt_locked(inp, ifp, mb, max_pacing_rate);
|
||||
|
||||
/*
|
||||
* NOTE: When attaching to a network interface a reference is
|
||||
* made to ensure the network interface doesn't go away until
|
||||
* all ratelimit connections are gone. The network interface
|
||||
* pointers compared below represent valid network interfaces,
|
||||
* except when comparing towards NULL.
|
||||
*/
|
||||
if (max_pacing_rate == 0 && inp->inp_snd_tag == NULL) {
|
||||
error = 0;
|
||||
} else if (!(ifp->if_capenable & IFCAP_TXRTLMT)) {
|
||||
if (inp->inp_snd_tag != NULL)
|
||||
in_pcbdetach_txrtlmt(inp);
|
||||
error = 0;
|
||||
} else if (inp->inp_snd_tag == NULL) {
|
||||
/*
|
||||
* In order to utilize packet pacing with RSS, we need
|
||||
* to wait until there is a valid RSS hash before we
|
||||
* can proceed:
|
||||
*/
|
||||
if (M_HASHTYPE_GET(mb) == M_HASHTYPE_NONE) {
|
||||
error = EAGAIN;
|
||||
} else {
|
||||
error = in_pcbattach_txrtlmt(inp, ifp, M_HASHTYPE_GET(mb),
|
||||
mb->m_pkthdr.flowid, max_pacing_rate);
|
||||
}
|
||||
} else {
|
||||
error = in_pcbmodify_txrtlmt(inp, max_pacing_rate);
|
||||
}
|
||||
if (error == 0 || error == EOPNOTSUPP)
|
||||
inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED;
|
||||
if (did_upgrade)
|
||||
INP_DOWNGRADE(inp);
|
||||
}
|
||||
@ -3424,4 +3478,14 @@ in_pcboutput_eagain(struct inpcb *inp)
|
||||
if (did_upgrade)
|
||||
INP_DOWNGRADE(inp);
|
||||
}
|
||||
|
||||
static void
|
||||
rl_init(void *st)
|
||||
{
|
||||
rate_limit_active = counter_u64_alloc(M_WAITOK);
|
||||
rate_limit_alloc_fail = counter_u64_alloc(M_WAITOK);
|
||||
rate_limit_set_ok = counter_u64_alloc(M_WAITOK);
|
||||
}
|
||||
|
||||
SYSINIT(rl, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, rl_init, NULL);
|
||||
#endif /* RATELIMIT */
|
||||
|
@ -883,8 +883,13 @@ struct sockaddr *
|
||||
in_sockaddr(in_port_t port, struct in_addr *addr);
|
||||
void in_pcbsosetlabel(struct socket *so);
|
||||
#ifdef RATELIMIT
|
||||
int in_pcbattach_txrtlmt(struct inpcb *, struct ifnet *, uint32_t, uint32_t, uint32_t);
|
||||
int
|
||||
in_pcboutput_txrtlmt_locked(struct inpcb *, struct ifnet *,
|
||||
struct mbuf *, uint32_t);
|
||||
int in_pcbattach_txrtlmt(struct inpcb *, struct ifnet *, uint32_t, uint32_t,
|
||||
uint32_t, struct m_snd_tag **);
|
||||
void in_pcbdetach_txrtlmt(struct inpcb *);
|
||||
void in_pcbdetach_tag(struct ifnet *ifp, struct m_snd_tag *mst);
|
||||
int in_pcbmodify_txrtlmt(struct inpcb *, uint32_t);
|
||||
int in_pcbquery_txrtlmt(struct inpcb *, uint32_t *);
|
||||
int in_pcbquery_txrlevel(struct inpcb *, uint32_t *);
|
||||
|
1234
sys/netinet/tcp_ratelimit.c
Normal file
1234
sys/netinet/tcp_ratelimit.c
Normal file
File diff suppressed because it is too large
Load Diff
141
sys/netinet/tcp_ratelimit.h
Normal file
141
sys/netinet/tcp_ratelimit.h
Normal file
@ -0,0 +1,141 @@
|
||||
/*-
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*
|
||||
* Copyright (c) 2018-2019
|
||||
* Netflix Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
* __FBSDID("$FreeBSD$");
|
||||
*
|
||||
*/
|
||||
/**
|
||||
* Author: Randall Stewart <rrs@netflix.com>
|
||||
*/
|
||||
#ifndef __tcp_ratelimit_h__
|
||||
#define __tcp_ratelimit_h__
|
||||
|
||||
struct m_snd_tag;
|
||||
|
||||
/* Flags on an individual rate */
|
||||
#define HDWRPACE_INITED 0x0001
|
||||
#define HDWRPACE_TAGPRESENT 0x0002
|
||||
#define HDWRPACE_IFPDEPARTED 0x0004
|
||||
struct tcp_hwrate_limit_table {
|
||||
const struct tcp_rate_set *ptbl; /* Pointer to parent table */
|
||||
struct m_snd_tag *tag; /* Send tag if needed (chelsio) */
|
||||
uint64_t rate; /* Rate we get in Bytes per second (Bps) */
|
||||
uint32_t time_between; /* Time-Gap between packets at this rate */
|
||||
uint32_t flags;
|
||||
};
|
||||
|
||||
/* Rateset flags */
|
||||
#define RS_IS_DEFF 0x0001 /* Its a lagg, do a double lookup */
|
||||
#define RS_IS_INTF 0x0002 /* Its a plain interface */
|
||||
#define RS_NO_PRE 0x0004 /* The interfacd has set rates */
|
||||
#define RS_INT_TBL 0x0010 /*
|
||||
* The table is the internal version
|
||||
* which has special setup requirements.
|
||||
*/
|
||||
#define RS_IS_DEAD 0x0020 /* The RS is dead list */
|
||||
#define RS_FUNERAL_SCHD 0x0040 /* Is a epoch call scheduled to bury this guy?*/
|
||||
#define RS_INTF_NO_SUP 0x0100 /* The interface does not support the ratelimiting */
|
||||
|
||||
struct tcp_rate_set {
|
||||
struct sysctl_ctx_list sysctl_ctx;
|
||||
CK_LIST_ENTRY(tcp_rate_set) next;
|
||||
struct ifnet *rs_ifp;
|
||||
struct tcp_hwrate_limit_table *rs_rlt;
|
||||
uint64_t rs_flows_using;
|
||||
uint64_t rs_flow_limit;
|
||||
uint32_t rs_if_dunit;
|
||||
int rs_rate_cnt;
|
||||
int rs_min_seg;
|
||||
int rs_highest_valid;
|
||||
int rs_lowest_valid;
|
||||
int rs_disable;
|
||||
int rs_flags;
|
||||
struct epoch_context rs_epoch_ctx;
|
||||
};
|
||||
|
||||
CK_LIST_HEAD(head_tcp_rate_set, tcp_rate_set);
|
||||
|
||||
/* Request flags */
|
||||
#define RS_PACING_EXACT_MATCH 0x0001 /* Need an exact match for rate */
|
||||
#define RS_PACING_GT 0x0002 /* Greater than requested */
|
||||
#define RS_PACING_GEQ 0x0004 /* Greater than or equal too */
|
||||
#define RS_PACING_LT 0x0008 /* Less than requested rate */
|
||||
#define RS_PACING_SUB_OK 0x0010 /* If a rate can't be found get the
|
||||
* next best rate (highest or lowest). */
|
||||
#ifdef RATELIMIT
|
||||
#ifdef _KERNEL
|
||||
#define DETAILED_RATELIMIT_SYSCTL 1 /*
|
||||
* Undefine this if you don't want
|
||||
* detailed rates to appear in
|
||||
* net.inet.tcp.rl.
|
||||
* With the defintion each rate
|
||||
* shows up in your sysctl tree
|
||||
* this can be big.
|
||||
*/
|
||||
|
||||
const struct tcp_hwrate_limit_table *
|
||||
tcp_set_pacing_rate(struct tcpcb *tp, struct ifnet *ifp,
|
||||
uint64_t bytes_per_sec, int flags, int *error);
|
||||
|
||||
const struct tcp_hwrate_limit_table *
|
||||
tcp_chg_pacing_rate(const struct tcp_hwrate_limit_table *crte,
|
||||
struct tcpcb *tp, struct ifnet *ifp,
|
||||
uint64_t bytes_per_sec, int flags, int *error);
|
||||
void
|
||||
tcp_rel_pacing_rate(const struct tcp_hwrate_limit_table *crte,
|
||||
struct tcpcb *tp);
|
||||
#else
|
||||
static inline const struct tcp_hwrate_limit_table *
|
||||
tcp_set_pacing_rate(struct tcpcb *tp, struct ifnet *ifp,
|
||||
uint64_t bytes_per_sec, int flags, int *error)
|
||||
{
|
||||
if (error)
|
||||
*error = EOPNOTSUPP;
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static inline const struct tcp_hwrate_limit_table *
|
||||
tcp_chg_pacing_rate(const struct tcp_hwrate_limit_table *crte,
|
||||
struct tcpcb *tp, struct ifnet *ifp,
|
||||
uint64_t bytes_per_sec, int flags, int *error)
|
||||
{
|
||||
if (error)
|
||||
*error = EOPNOTSUPP;
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static inline void
|
||||
tcp_rel_pacing_rate(const struct tcp_hwrate_limit_table *crte,
|
||||
struct tcpcb *tp)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
Loading…
x
Reference in New Issue
Block a user