Opps committed the wrong ratelimit version in the
whitespace cleanup.. Restore it to the proper version. Sponsored by: Netfilx Inc.
This commit is contained in:
parent
9dd4070571
commit
3d80d57562
@ -49,11 +49,9 @@ __FBSDID("$FreeBSD$");
|
|||||||
#include <sys/eventhandler.h>
|
#include <sys/eventhandler.h>
|
||||||
#include <sys/mutex.h>
|
#include <sys/mutex.h>
|
||||||
#include <sys/ck.h>
|
#include <sys/ck.h>
|
||||||
#include <net/if.h>
|
#define TCPSTATES /* for logging */
|
||||||
#include <net/if_var.h>
|
|
||||||
#include <netinet/in.h>
|
#include <netinet/in.h>
|
||||||
#include <netinet/in_pcb.h>
|
#include <netinet/in_pcb.h>
|
||||||
#define TCPSTATES /* for logging */
|
|
||||||
#include <netinet/tcp_var.h>
|
#include <netinet/tcp_var.h>
|
||||||
#ifdef INET6
|
#ifdef INET6
|
||||||
#include <netinet6/tcp6_var.h>
|
#include <netinet6/tcp6_var.h>
|
||||||
@ -66,186 +64,45 @@ __FBSDID("$FreeBSD$");
|
|||||||
* For the purposes of each send, what is the size
|
* For the purposes of each send, what is the size
|
||||||
* of an ethernet frame.
|
* of an ethernet frame.
|
||||||
*/
|
*/
|
||||||
|
#ifndef ETHERNET_SEGMENT_SIZE
|
||||||
|
#define ETHERNET_SEGMENT_SIZE 1500
|
||||||
|
#endif
|
||||||
MALLOC_DEFINE(M_TCPPACE, "tcp_hwpace", "TCP Hardware pacing memory");
|
MALLOC_DEFINE(M_TCPPACE, "tcp_hwpace", "TCP Hardware pacing memory");
|
||||||
#ifdef RATELIMIT
|
#ifdef RATELIMIT
|
||||||
|
|
||||||
/*
|
|
||||||
* The following preferred table will seem weird to
|
|
||||||
* the casual viewer. Why do we not have any rates below
|
|
||||||
* 1Mbps? Why do we have a rate at 1.44Mbps called common?
|
|
||||||
* Why do the rates cluster in the 1-100Mbps range more
|
|
||||||
* than others? Why does the table jump around at the beginnign
|
|
||||||
* and then be more consistently raising?
|
|
||||||
*
|
|
||||||
* Let me try to answer those questions. A lot of
|
|
||||||
* this is dependant on the hardware. We have three basic
|
|
||||||
* supporters of rate limiting
|
|
||||||
*
|
|
||||||
* Chelsio - Supporting 16 configurable rates.
|
|
||||||
* Mlx - c4 supporting 13 fixed rates.
|
|
||||||
* Mlx - c5 & c6 supporting 127 configurable rates.
|
|
||||||
*
|
|
||||||
* The c4 is why we have a common rate that is available
|
|
||||||
* in all rate tables. This is a selected rate from the
|
|
||||||
* c4 table and we assure its available in all ratelimit
|
|
||||||
* tables. This way the tcp_ratelimit code has an assured
|
|
||||||
* rate it should always be able to get. This answers a
|
|
||||||
* couple of the questions above.
|
|
||||||
*
|
|
||||||
* So what about the rest, well the table is built to
|
|
||||||
* try to get the most out of a joint hardware/software
|
|
||||||
* pacing system. The software pacer will always pick
|
|
||||||
* a rate higher than the b/w that it is estimating
|
|
||||||
*
|
|
||||||
* on the path. This is done for two reasons.
|
|
||||||
* a) So we can discover more b/w
|
|
||||||
* and
|
|
||||||
* b) So we can send a block of MSS's down and then
|
|
||||||
* have the software timer go off after the previous
|
|
||||||
* send is completely out of the hardware.
|
|
||||||
*
|
|
||||||
* But when we do <b> we don't want to have the delay
|
|
||||||
* between the last packet sent by the hardware be
|
|
||||||
* excessively long (to reach our desired rate).
|
|
||||||
*
|
|
||||||
* So let me give an example for clarity.
|
|
||||||
*
|
|
||||||
* Lets assume that the tcp stack sees that 29,110,000 bps is
|
|
||||||
* what the bw of the path is. The stack would select the
|
|
||||||
* rate 31Mbps. 31Mbps means that each send that is done
|
|
||||||
* by the hardware will cause a 387 micro-second gap between
|
|
||||||
* the pacets sent at that rate. For 29,110,000 bps we
|
|
||||||
* would need 412 micro-seconds gap between each send.
|
|
||||||
*
|
|
||||||
* Now we pick a MSS size based on the delta between the
|
|
||||||
* two rates (412 - 387) divided into the rate we really
|
|
||||||
* wish to send at rounded up. That results in a MSS
|
|
||||||
* send of 17 mss's at once. The hardware then will
|
|
||||||
* run out of data in a single 17MSS send in 6,579 micro-seconds.
|
|
||||||
* On the other hand the software pacer will send more data
|
|
||||||
* in 7,004 micro-seconds. This means that we will refill
|
|
||||||
* the hardware 25 microseconds after it would have sent
|
|
||||||
* next. This is a win since we no are only sending every
|
|
||||||
* 7ms or so and yet all the packets are spaced on
|
|
||||||
* the wire with 94% of what they should be and only
|
|
||||||
* the last packet is delayed extra to make up for the
|
|
||||||
* difference. Note that the above formula has two
|
|
||||||
* important caveat. If we are above (b/w wise) over
|
|
||||||
* 100Mbps we double the result of the MSS calculation.
|
|
||||||
* The second caveat is if we are 500Mbps or more
|
|
||||||
* we just send the maximum MSS at once i.e. 45MSS
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#define COMMON_RATE 180500
|
#define COMMON_RATE 180500
|
||||||
uint64_t desired_rates[] = {
|
uint64_t desired_rates[] = {
|
||||||
122500, /* 1Mbps - rate 1 */
|
62500, /* 500Kbps */
|
||||||
180500, /* 1.44Mpbs - rate 2 common rate */
|
180500, /* 1.44Mpbs */
|
||||||
375000, /* 3Mbps - rate 3 */
|
375000, /* 3Mbps */
|
||||||
625000, /* 5Mbps - rate 4 */
|
500000, /* 4Mbps */
|
||||||
875000, /* 7Mbps - rate 5 */
|
625000, /* 5Mbps */
|
||||||
1125000, /* 9Mbps - rate 6 */
|
750000, /* 6Mbps */
|
||||||
1375000, /* 11Mbps - rate 7 */
|
1000000, /* 8Mbps */
|
||||||
1625000, /* 13Mbps - rate 8 */
|
1250000, /* 10Mbps */
|
||||||
2625000, /* 21Mbps - rate 9 */
|
2500000, /* 20Mbps */
|
||||||
3875000, /* 31Mbps - rate 10 */
|
3750000, /* 30Mbps */
|
||||||
5125000, /* 41Meg - rate 11 */
|
5000000, /* 40Meg */
|
||||||
12500000, /* 100Mbps - rate 12 */
|
6250000, /* 50Mbps */
|
||||||
25000000, /* 200Mbps - rate 13 */
|
12500000, /* 100Mbps */
|
||||||
50000000, /* 400Mbps - rate 14 */
|
25000000, /* 200Mbps */
|
||||||
63750000, /* 51Mbps - rate 15 */
|
50000000, /* 400Mbps */
|
||||||
100000000, /* 800Mbps - rate 16 */
|
100000000, /* 800Mbps */
|
||||||
1875000, /* 15Mbps - rate 17 */
|
12500, /* 100kbps */
|
||||||
2125000, /* 17Mbps - rate 18 */
|
25000, /* 200kbps */
|
||||||
2375000, /* 19Mbps - rate 19 */
|
875000, /* 7Mbps */
|
||||||
2875000, /* 23Mbps - rate 20 */
|
1125000, /* 9Mbps */
|
||||||
3125000, /* 25Mbps - rate 21 */
|
1875000, /* 15Mbps */
|
||||||
3375000, /* 27Mbps - rate 22 */
|
3125000, /* 25Mbps */
|
||||||
3625000, /* 29Mbps - rate 23 */
|
8125000, /* 65Mbps */
|
||||||
4125000, /* 33Mbps - rate 24 */
|
10000000, /* 80Mbps */
|
||||||
4375000, /* 35Mbps - rate 25 */
|
18750000, /* 150Mbps */
|
||||||
4625000, /* 37Mbps - rate 26 */
|
20000000, /* 250Mbps */
|
||||||
4875000, /* 39Mbps - rate 27 */
|
37500000, /* 350Mbps */
|
||||||
5375000, /* 43Mbps - rate 28 */
|
62500000, /* 500Mbps */
|
||||||
5625000, /* 45Mbps - rate 29 */
|
78125000, /* 625Mbps */
|
||||||
5875000, /* 47Mbps - rate 30 */
|
125000000, /* 1Gbps */
|
||||||
6125000, /* 49Mbps - rate 31 */
|
|
||||||
6625000, /* 53Mbps - rate 32 */
|
|
||||||
6875000, /* 55Mbps - rate 33 */
|
|
||||||
7125000, /* 57Mbps - rate 34 */
|
|
||||||
7375000, /* 59Mbps - rate 35 */
|
|
||||||
7625000, /* 61Mbps - rate 36 */
|
|
||||||
7875000, /* 63Mbps - rate 37 */
|
|
||||||
8125000, /* 65Mbps - rate 38 */
|
|
||||||
8375000, /* 67Mbps - rate 39 */
|
|
||||||
8625000, /* 69Mbps - rate 40 */
|
|
||||||
8875000, /* 71Mbps - rate 41 */
|
|
||||||
9125000, /* 73Mbps - rate 42 */
|
|
||||||
9375000, /* 75Mbps - rate 43 */
|
|
||||||
9625000, /* 77Mbps - rate 44 */
|
|
||||||
9875000, /* 79Mbps - rate 45 */
|
|
||||||
10125000, /* 81Mbps - rate 46 */
|
|
||||||
10375000, /* 83Mbps - rate 47 */
|
|
||||||
10625000, /* 85Mbps - rate 48 */
|
|
||||||
10875000, /* 87Mbps - rate 49 */
|
|
||||||
11125000, /* 89Mbps - rate 50 */
|
|
||||||
11375000, /* 91Mbps - rate 51 */
|
|
||||||
11625000, /* 93Mbps - rate 52 */
|
|
||||||
11875000, /* 95Mbps - rate 53 */
|
|
||||||
13125000, /* 105Mbps - rate 54 */
|
|
||||||
13750000, /* 110Mbps - rate 55 */
|
|
||||||
14375000, /* 115Mbps - rate 56 */
|
|
||||||
15000000, /* 120Mbps - rate 57 */
|
|
||||||
15625000, /* 125Mbps - rate 58 */
|
|
||||||
16250000, /* 130Mbps - rate 59 */
|
|
||||||
16875000, /* 135Mbps - rate 60 */
|
|
||||||
17500000, /* 140Mbps - rate 61 */
|
|
||||||
18125000, /* 145Mbps - rate 62 */
|
|
||||||
18750000, /* 150Mbps - rate 64 */
|
|
||||||
20000000, /* 160Mbps - rate 65 */
|
|
||||||
21250000, /* 170Mbps - rate 66 */
|
|
||||||
22500000, /* 180Mbps - rate 67 */
|
|
||||||
23750000, /* 190Mbps - rate 68 */
|
|
||||||
26250000, /* 210Mbps - rate 69 */
|
|
||||||
27500000, /* 220Mbps - rate 70 */
|
|
||||||
28750000, /* 230Mbps - rate 71 */
|
|
||||||
30000000, /* 240Mbps - rate 72 */
|
|
||||||
31250000, /* 250Mbps - rate 73 */
|
|
||||||
34375000, /* 275Mbps - rate 74 */
|
|
||||||
37500000, /* 300Mbps - rate 75 */
|
|
||||||
40625000, /* 325Mbps - rate 76 */
|
|
||||||
43750000, /* 350Mbps - rate 77 */
|
|
||||||
46875000, /* 375Mbps - rate 78 */
|
|
||||||
53125000, /* 425Mbps - rate 79 */
|
|
||||||
56250000, /* 450Mbps - rate 80 */
|
|
||||||
59375000, /* 475Mbps - rate 81 */
|
|
||||||
62500000, /* 500Mbps - rate 82 */
|
|
||||||
68750000, /* 550Mbps - rate 83 */
|
|
||||||
75000000, /* 600Mbps - rate 84 */
|
|
||||||
81250000, /* 650Mbps - rate 85 */
|
|
||||||
87500000, /* 700Mbps - rate 86 */
|
|
||||||
93750000, /* 750Mbps - rate 87 */
|
|
||||||
106250000, /* 850Mbps - rate 88 */
|
|
||||||
112500000, /* 900Mbps - rate 89 */
|
|
||||||
125000000, /* 1Gbps - rate 90 */
|
|
||||||
156250000, /* 1.25Gps - rate 91 */
|
|
||||||
187500000, /* 1.5Gps - rate 92 */
|
|
||||||
218750000, /* 1.75Gps - rate 93 */
|
|
||||||
250000000, /* 2Gbps - rate 94 */
|
|
||||||
281250000, /* 2.25Gps - rate 95 */
|
|
||||||
312500000, /* 2.5Gbps - rate 96 */
|
|
||||||
343750000, /* 2.75Gbps - rate 97 */
|
|
||||||
375000000, /* 3Gbps - rate 98 */
|
|
||||||
500000000, /* 4Gbps - rate 99 */
|
|
||||||
625000000, /* 5Gbps - rate 100 */
|
|
||||||
750000000, /* 6Gbps - rate 101 */
|
|
||||||
875000000, /* 7Gbps - rate 102 */
|
|
||||||
1000000000, /* 8Gbps - rate 103 */
|
|
||||||
1125000000, /* 9Gbps - rate 104 */
|
|
||||||
1250000000, /* 10Gbps - rate 105 */
|
|
||||||
1875000000, /* 15Gbps - rate 106 */
|
|
||||||
2500000000 /* 20Gbps - rate 107 */
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#define MAX_HDWR_RATES (sizeof(desired_rates)/sizeof(uint64_t))
|
#define MAX_HDWR_RATES (sizeof(desired_rates)/sizeof(uint64_t))
|
||||||
#define RS_ORDERED_COUNT 16 /*
|
#define RS_ORDERED_COUNT 16 /*
|
||||||
* Number that are in order
|
* Number that are in order
|
||||||
@ -427,7 +284,7 @@ rs_defer_destroy(struct tcp_rate_set *rs)
|
|||||||
|
|
||||||
/* Set flag to only defer once. */
|
/* Set flag to only defer once. */
|
||||||
rs->rs_flags |= RS_FUNERAL_SCHD;
|
rs->rs_flags |= RS_FUNERAL_SCHD;
|
||||||
NET_EPOCH_CALL(rs_destroy, &rs->rs_epoch_ctx);
|
epoch_call(net_epoch, &rs->rs_epoch_ctx, rs_destroy);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef INET
|
#ifdef INET
|
||||||
@ -522,24 +379,16 @@ rt_setup_new_rs(struct ifnet *ifp, int *error)
|
|||||||
* We can do nothing if we cannot
|
* We can do nothing if we cannot
|
||||||
* get a query back from the driver.
|
* get a query back from the driver.
|
||||||
*/
|
*/
|
||||||
printf("No query functions for %s:%d-- failed\n",
|
|
||||||
ifp->if_dname, ifp->if_dunit);
|
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
rs = malloc(sizeof(struct tcp_rate_set), M_TCPPACE, M_NOWAIT | M_ZERO);
|
rs = malloc(sizeof(struct tcp_rate_set), M_TCPPACE, M_NOWAIT | M_ZERO);
|
||||||
if (rs == NULL) {
|
if (rs == NULL) {
|
||||||
if (error)
|
if (error)
|
||||||
*error = ENOMEM;
|
*error = ENOMEM;
|
||||||
printf("No memory for malloc\n");
|
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
memset(&rl, 0, sizeof(rl));
|
|
||||||
rl.flags = RT_NOSUPPORT;
|
rl.flags = RT_NOSUPPORT;
|
||||||
ifp->if_ratelimit_query(ifp, &rl);
|
ifp->if_ratelimit_query(ifp, &rl);
|
||||||
printf("if:%s:%d responds with flags:0x%x rate count:%d\n",
|
|
||||||
ifp->if_dname,
|
|
||||||
ifp->if_dunit,
|
|
||||||
rl.flags, rl.number_of_rates);
|
|
||||||
if (rl.flags & RT_IS_UNUSABLE) {
|
if (rl.flags & RT_IS_UNUSABLE) {
|
||||||
/*
|
/*
|
||||||
* The interface does not really support
|
* The interface does not really support
|
||||||
@ -582,7 +431,7 @@ rt_setup_new_rs(struct ifnet *ifp, int *error)
|
|||||||
mtx_unlock(&rs_mtx);
|
mtx_unlock(&rs_mtx);
|
||||||
return (rs);
|
return (rs);
|
||||||
} else if ((rl.flags & RT_IS_FIXED_TABLE) == RT_IS_FIXED_TABLE) {
|
} else if ((rl.flags & RT_IS_FIXED_TABLE) == RT_IS_FIXED_TABLE) {
|
||||||
/* Mellanox C4 likely */
|
/* Mellanox most likely */
|
||||||
rs->rs_ifp = ifp;
|
rs->rs_ifp = ifp;
|
||||||
rs->rs_if_dunit = ifp->if_dunit;
|
rs->rs_if_dunit = ifp->if_dunit;
|
||||||
rs->rs_rate_cnt = rl.number_of_rates;
|
rs->rs_rate_cnt = rl.number_of_rates;
|
||||||
@ -593,7 +442,7 @@ rt_setup_new_rs(struct ifnet *ifp, int *error)
|
|||||||
rs->rs_disable = 0;
|
rs->rs_disable = 0;
|
||||||
rate_table_act = rl.rate_table;
|
rate_table_act = rl.rate_table;
|
||||||
} else if ((rl.flags & RT_IS_SELECTABLE) == RT_IS_SELECTABLE) {
|
} else if ((rl.flags & RT_IS_SELECTABLE) == RT_IS_SELECTABLE) {
|
||||||
/* Chelsio, C5 and C6 of Mellanox? */
|
/* Chelsio */
|
||||||
rs->rs_ifp = ifp;
|
rs->rs_ifp = ifp;
|
||||||
rs->rs_if_dunit = ifp->if_dunit;
|
rs->rs_if_dunit = ifp->if_dunit;
|
||||||
rs->rs_rate_cnt = rl.number_of_rates;
|
rs->rs_rate_cnt = rl.number_of_rates;
|
||||||
@ -685,14 +534,6 @@ bail:
|
|||||||
rs->rs_lowest_valid = i;
|
rs->rs_lowest_valid = i;
|
||||||
} else {
|
} else {
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
if ((rl.flags & RT_IS_SETUP_REQ) &&
|
|
||||||
(ifp->if_ratelimit_query)) {
|
|
||||||
err = ifp->if_ratelimit_setup(ifp,
|
|
||||||
rs->rs_rlt[i].rate, i);
|
|
||||||
if (err)
|
|
||||||
goto handle_err;
|
|
||||||
}
|
|
||||||
#ifdef RSS
|
#ifdef RSS
|
||||||
hash_type = M_HASHTYPE_RSS_TCP_IPV4;
|
hash_type = M_HASHTYPE_RSS_TCP_IPV4;
|
||||||
#else
|
#else
|
||||||
@ -704,7 +545,6 @@ bail:
|
|||||||
rs->rs_rlt[i].rate,
|
rs->rs_rlt[i].rate,
|
||||||
&rs->rs_rlt[i].tag);
|
&rs->rs_rlt[i].tag);
|
||||||
if (err) {
|
if (err) {
|
||||||
handle_err:
|
|
||||||
if (i == (rs->rs_rate_cnt - 1)) {
|
if (i == (rs->rs_rate_cnt - 1)) {
|
||||||
/*
|
/*
|
||||||
* Huh - first rate and we can't get
|
* Huh - first rate and we can't get
|
||||||
@ -1038,7 +878,7 @@ rt_setup_rate(struct inpcb *inp, struct ifnet *ifp, uint64_t bytes_per_sec,
|
|||||||
struct epoch_tracker et;
|
struct epoch_tracker et;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
NET_EPOCH_ENTER(et);
|
epoch_enter_preempt(net_epoch_preempt, &et);
|
||||||
use_real_interface:
|
use_real_interface:
|
||||||
CK_LIST_FOREACH(rs, &int_rs, next) {
|
CK_LIST_FOREACH(rs, &int_rs, next) {
|
||||||
/*
|
/*
|
||||||
@ -1071,14 +911,14 @@ use_real_interface:
|
|||||||
*/
|
*/
|
||||||
if (rs->rs_disable && error)
|
if (rs->rs_disable && error)
|
||||||
*error = ENODEV;
|
*error = ENODEV;
|
||||||
NET_EPOCH_EXIT(et);
|
epoch_exit_preempt(net_epoch_preempt, &et);
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((rs == NULL) || (rs->rs_disable != 0)) {
|
if ((rs == NULL) || (rs->rs_disable != 0)) {
|
||||||
if (rs->rs_disable && error)
|
if (rs->rs_disable && error)
|
||||||
*error = ENOSPC;
|
*error = ENOSPC;
|
||||||
NET_EPOCH_EXIT(et);
|
epoch_exit_preempt(net_epoch_preempt, &et);
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
if (rs->rs_flags & RS_IS_DEFF) {
|
if (rs->rs_flags & RS_IS_DEFF) {
|
||||||
@ -1089,7 +929,7 @@ use_real_interface:
|
|||||||
if (tifp == NULL) {
|
if (tifp == NULL) {
|
||||||
if (rs->rs_disable && error)
|
if (rs->rs_disable && error)
|
||||||
*error = ENOTSUP;
|
*error = ENOTSUP;
|
||||||
NET_EPOCH_EXIT(et);
|
epoch_exit_preempt(net_epoch_preempt, &et);
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
goto use_real_interface;
|
goto use_real_interface;
|
||||||
@ -1098,7 +938,7 @@ use_real_interface:
|
|||||||
((rs->rs_flows_using + 1) > rs->rs_flow_limit)) {
|
((rs->rs_flows_using + 1) > rs->rs_flow_limit)) {
|
||||||
if (error)
|
if (error)
|
||||||
*error = ENOSPC;
|
*error = ENOSPC;
|
||||||
NET_EPOCH_EXIT(et);
|
epoch_exit_preempt(net_epoch_preempt, &et);
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
rte = tcp_find_suitable_rate(rs, bytes_per_sec, flags);
|
rte = tcp_find_suitable_rate(rs, bytes_per_sec, flags);
|
||||||
@ -1122,7 +962,7 @@ use_real_interface:
|
|||||||
*/
|
*/
|
||||||
atomic_add_64(&rs->rs_flows_using, 1);
|
atomic_add_64(&rs->rs_flows_using, 1);
|
||||||
}
|
}
|
||||||
NET_EPOCH_EXIT(et);
|
epoch_exit_preempt(net_epoch_preempt, &et);
|
||||||
return (rte);
|
return (rte);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1138,22 +978,13 @@ tcp_rl_ifnet_link(void *arg __unused, struct ifnet *ifp, int link_state)
|
|||||||
* We only care on an interface going up that is rate-limit
|
* We only care on an interface going up that is rate-limit
|
||||||
* capable.
|
* capable.
|
||||||
*/
|
*/
|
||||||
printf("ifp:%s.%d does not support rate-limit(0x%x) or link_state is not UP(state:%d)\n",
|
|
||||||
ifp->if_dname,
|
|
||||||
ifp->if_dunit,
|
|
||||||
ifp->if_capabilities,
|
|
||||||
link_state);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
mtx_lock(&rs_mtx);
|
mtx_lock(&rs_mtx);
|
||||||
printf("Link UP on interface %s.%d\n",
|
|
||||||
ifp->if_dname,
|
|
||||||
ifp->if_dunit);
|
|
||||||
CK_LIST_FOREACH(rs, &int_rs, next) {
|
CK_LIST_FOREACH(rs, &int_rs, next) {
|
||||||
if ((rs->rs_ifp == ifp) &&
|
if ((rs->rs_ifp == ifp) &&
|
||||||
(rs->rs_if_dunit == ifp->if_dunit)) {
|
(rs->rs_if_dunit == ifp->if_dunit)) {
|
||||||
/* We already have initialized this guy */
|
/* We already have initialized this guy */
|
||||||
printf("Interface already initialized\n");
|
|
||||||
mtx_unlock(&rs_mtx);
|
mtx_unlock(&rs_mtx);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -1254,7 +1085,6 @@ tcp_set_pacing_rate(struct tcpcb *tp, struct ifnet *ifp,
|
|||||||
*error = EINVAL;
|
*error = EINVAL;
|
||||||
rte = NULL;
|
rte = NULL;
|
||||||
}
|
}
|
||||||
*error = 0;
|
|
||||||
return (rte);
|
return (rte);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1364,112 +1194,6 @@ tcp_rel_pacing_rate(const struct tcp_hwrate_limit_table *crte, struct tcpcb *tp)
|
|||||||
in_pcbdetach_txrtlmt(tp->t_inpcb);
|
in_pcbdetach_txrtlmt(tp->t_inpcb);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define ONE_POINT_TWO_MEG 150000 /* 1.2 megabits in bytes */
|
|
||||||
#define ONE_HUNDRED_MBPS 12500000 /* 100Mbps in bytes per second */
|
|
||||||
#define FIVE_HUNDRED_MBPS 62500000 /* 500Mbps in bytes per second */
|
|
||||||
#define MAX_MSS_SENT 43 /* 43 mss = 43 x 1500 = 64,500 bytes */
|
|
||||||
|
|
||||||
uint32_t
|
|
||||||
tcp_get_pacing_mss(uint64_t bw, uint32_t segsiz, int can_use_1mss,
|
|
||||||
const struct tcp_hwrate_limit_table *te)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* We use the google formula to calculate the
|
|
||||||
* TSO size. I.E.
|
|
||||||
* bw < 24Meg
|
|
||||||
* tso = 2mss
|
|
||||||
* else
|
|
||||||
* tso = min(bw/1000, 64k)
|
|
||||||
*
|
|
||||||
* Note for these calculations we ignore the
|
|
||||||
* packet overhead (enet hdr, ip hdr and tcp hdr).
|
|
||||||
*/
|
|
||||||
uint64_t lentim, res, bytes;
|
|
||||||
uint32_t new_tso, min_tso_segs;
|
|
||||||
|
|
||||||
bytes = bw / 1000;
|
|
||||||
if (bytes > (64 * 1000))
|
|
||||||
bytes = 64 * 1000;
|
|
||||||
/* Round up */
|
|
||||||
new_tso = (bytes + segsiz - 1) / segsiz;
|
|
||||||
if (can_use_1mss && (bw < ONE_POINT_TWO_MEG))
|
|
||||||
min_tso_segs = 1;
|
|
||||||
else
|
|
||||||
min_tso_segs = 2;
|
|
||||||
if (new_tso < min_tso_segs)
|
|
||||||
new_tso = min_tso_segs;
|
|
||||||
if (new_tso > MAX_MSS_SENT)
|
|
||||||
new_tso = MAX_MSS_SENT;
|
|
||||||
new_tso *= segsiz;
|
|
||||||
/*
|
|
||||||
* If we are not doing hardware pacing
|
|
||||||
* then we are done.
|
|
||||||
*/
|
|
||||||
if (te == NULL)
|
|
||||||
return(new_tso);
|
|
||||||
/*
|
|
||||||
* For hardware pacing we look at the
|
|
||||||
* rate you are sending at and compare
|
|
||||||
* that to the rate you have in hardware.
|
|
||||||
*
|
|
||||||
* If the hardware rate is slower than your
|
|
||||||
* software rate then you are in error and
|
|
||||||
* we will build a queue in our hardware whic
|
|
||||||
* is probably not desired, in such a case
|
|
||||||
* just return the non-hardware TSO size.
|
|
||||||
*
|
|
||||||
* If the rate in hardware is faster (which
|
|
||||||
* it should be) then look at how long it
|
|
||||||
* takes to send one ethernet segment size at
|
|
||||||
* your b/w and compare that to the time it
|
|
||||||
* takes to send at the rate you had selected.
|
|
||||||
*
|
|
||||||
* If your time is greater (which we hope it is)
|
|
||||||
* we get the delta between the two, and then
|
|
||||||
* divide that into your pacing time. This tells
|
|
||||||
* us how many MSS you can send down at once (rounded up).
|
|
||||||
*
|
|
||||||
* Note we also double this value if the b/w is over
|
|
||||||
* 100Mbps. If its over 500meg we just set you to the
|
|
||||||
* max (43 segments).
|
|
||||||
*/
|
|
||||||
if (te->rate > FIVE_HUNDRED_MBPS)
|
|
||||||
return (segsiz * MAX_MSS_SENT);
|
|
||||||
if (te->rate == bw) {
|
|
||||||
/* We are pacing at exactly the hdwr rate */
|
|
||||||
return (segsiz * MAX_MSS_SENT);
|
|
||||||
}
|
|
||||||
lentim = ETHERNET_SEGMENT_SIZE * USECS_IN_SECOND;
|
|
||||||
res = lentim / bw;
|
|
||||||
if (res > te->time_between) {
|
|
||||||
uint32_t delta, segs;
|
|
||||||
|
|
||||||
delta = res - te->time_between;
|
|
||||||
segs = (res + delta - 1)/delta;
|
|
||||||
if (te->rate > ONE_HUNDRED_MBPS)
|
|
||||||
segs *= 2;
|
|
||||||
if (segs < min_tso_segs)
|
|
||||||
segs = min_tso_segs;
|
|
||||||
if (segs > MAX_MSS_SENT)
|
|
||||||
segs = MAX_MSS_SENT;
|
|
||||||
segs *= segsiz;
|
|
||||||
if (segs < new_tso) {
|
|
||||||
/* unexpected ? */
|
|
||||||
return(new_tso);
|
|
||||||
} else {
|
|
||||||
return (segs);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* Your time is smaller which means
|
|
||||||
* we will grow a queue on our
|
|
||||||
* hardware. Send back the non-hardware
|
|
||||||
* rate.
|
|
||||||
*/
|
|
||||||
return (new_tso);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static eventhandler_tag rl_ifnet_departs;
|
static eventhandler_tag rl_ifnet_departs;
|
||||||
static eventhandler_tag rl_ifnet_arrives;
|
static eventhandler_tag rl_ifnet_arrives;
|
||||||
static eventhandler_tag rl_shutdown_start;
|
static eventhandler_tag rl_shutdown_start;
|
||||||
|
@ -88,9 +88,6 @@ CK_LIST_HEAD(head_tcp_rate_set, tcp_rate_set);
|
|||||||
#define RS_PACING_SUB_OK 0x0010 /* If a rate can't be found get the
|
#define RS_PACING_SUB_OK 0x0010 /* If a rate can't be found get the
|
||||||
* next best rate (highest or lowest). */
|
* next best rate (highest or lowest). */
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
#ifndef ETHERNET_SEGMENT_SIZE
|
|
||||||
#define ETHERNET_SEGMENT_SIZE 1514
|
|
||||||
#endif
|
|
||||||
#ifdef RATELIMIT
|
#ifdef RATELIMIT
|
||||||
#define DETAILED_RATELIMIT_SYSCTL 1 /*
|
#define DETAILED_RATELIMIT_SYSCTL 1 /*
|
||||||
* Undefine this if you don't want
|
* Undefine this if you don't want
|
||||||
@ -138,17 +135,7 @@ tcp_rel_pacing_rate(const struct tcp_hwrate_limit_table *crte,
|
|||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
/*
|
#endif
|
||||||
* Given a b/w and a segsiz, and optional hardware
|
|
||||||
* rate limit, return the ideal size to burst
|
|
||||||
* out at once. Note the parameter can_use_1mss
|
|
||||||
* dictates if the transport will tolerate a 1mss
|
|
||||||
* limit, if not it will bottom out at 2mss (think
|
|
||||||
* delayed ack).
|
|
||||||
*/
|
|
||||||
uint32_t
|
|
||||||
tcp_get_pacing_mss(uint64_t bw, uint32_t segsiz, int can_use_1mss,
|
|
||||||
const struct tcp_hwrate_limit_table *te);
|
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user