ip_mroute: refactor bw_meter API
API should work as following: - periodicaly report Lower-or-EQual bandwidth (LEQ) connections over kernel socket, if user application registered for such per-flow notifications - report Grater-or-EQual (GEQ) bandwidth as soon as it reaches specified value in configured time window Custom implementation of callouts was removed. There is no point of doing calout-wheel here as generic callouts are doing exactly the same. The performance is not critical for such reporting, so the biggest concern should be to have a code which can be easily maintained. This is ia preparation for locking rework which is highly inefficient. Approved by: mw Sponsored by: Stormshield Obtained from: Semihalf Differential Revision: https://reviews.freebsd.org/D30210
This commit is contained in:
parent
d80a903a1c
commit
741afc6233
@ -49,6 +49,7 @@
|
|||||||
* Modified by Pavlin Radoslavov, USC/ISI, May 1998, August 1999, October 2000
|
* Modified by Pavlin Radoslavov, USC/ISI, May 1998, August 1999, October 2000
|
||||||
* Modified by Hitoshi Asaeda, WIDE, August 2000
|
* Modified by Hitoshi Asaeda, WIDE, August 2000
|
||||||
* Modified by Pavlin Radoslavov, ICSI, October 2002
|
* Modified by Pavlin Radoslavov, ICSI, October 2002
|
||||||
|
* Modified by Wojciech Macek, Semihalf, May 2021
|
||||||
*
|
*
|
||||||
* MROUTING Revision: 3.5
|
* MROUTING Revision: 3.5
|
||||||
* and PIM-SMv2 and PIM-DM support, advanced API support,
|
* and PIM-SMv2 and PIM-DM support, advanced API support,
|
||||||
@ -202,16 +203,6 @@ VNET_DEFINE_STATIC(struct callout, expire_upcalls_ch);
|
|||||||
* Bandwidth meter variables and constants
|
* Bandwidth meter variables and constants
|
||||||
*/
|
*/
|
||||||
static MALLOC_DEFINE(M_BWMETER, "bwmeter", "multicast upcall bw meters");
|
static MALLOC_DEFINE(M_BWMETER, "bwmeter", "multicast upcall bw meters");
|
||||||
/*
|
|
||||||
* Pending timeouts are stored in a hash table, the key being the
|
|
||||||
* expiration time. Periodically, the entries are analysed and processed.
|
|
||||||
*/
|
|
||||||
#define BW_METER_BUCKETS 1024
|
|
||||||
VNET_DEFINE_STATIC(struct bw_meter **, bw_meter_timers);
|
|
||||||
#define V_bw_meter_timers VNET(bw_meter_timers)
|
|
||||||
VNET_DEFINE_STATIC(struct callout, bw_meter_ch);
|
|
||||||
#define V_bw_meter_ch VNET(bw_meter_ch)
|
|
||||||
#define BW_METER_PERIOD (hz) /* periodical handling of bw meters */
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Pending upcalls are stored in a vector which is flushed when
|
* Pending upcalls are stored in a vector which is flushed when
|
||||||
@ -320,14 +311,13 @@ static int add_mfc(struct mfcctl2 *);
|
|||||||
static int add_vif(struct vifctl *);
|
static int add_vif(struct vifctl *);
|
||||||
static void bw_meter_prepare_upcall(struct bw_meter *, struct timeval *);
|
static void bw_meter_prepare_upcall(struct bw_meter *, struct timeval *);
|
||||||
static void bw_meter_process(void);
|
static void bw_meter_process(void);
|
||||||
static void bw_meter_receive_packet(struct bw_meter *, int,
|
static void bw_meter_geq_receive_packet(struct bw_meter *, int,
|
||||||
struct timeval *);
|
struct timeval *);
|
||||||
static void bw_upcalls_send(void);
|
static void bw_upcalls_send(void);
|
||||||
static int del_bw_upcall(struct bw_upcall *);
|
static int del_bw_upcall(struct bw_upcall *);
|
||||||
static int del_mfc(struct mfcctl2 *);
|
static int del_mfc(struct mfcctl2 *);
|
||||||
static int del_vif(vifi_t);
|
static int del_vif(vifi_t);
|
||||||
static int del_vif_locked(vifi_t);
|
static int del_vif_locked(vifi_t);
|
||||||
static void expire_bw_meter_process(void *);
|
|
||||||
static void expire_bw_upcalls_send(void *);
|
static void expire_bw_upcalls_send(void *);
|
||||||
static void expire_mfc(struct mfc *);
|
static void expire_mfc(struct mfc *);
|
||||||
static void expire_upcalls(void *);
|
static void expire_upcalls(void *);
|
||||||
@ -685,8 +675,6 @@ ip_mrouter_init(struct socket *so, int version)
|
|||||||
curvnet);
|
curvnet);
|
||||||
callout_reset(&V_bw_upcalls_ch, BW_UPCALLS_PERIOD, expire_bw_upcalls_send,
|
callout_reset(&V_bw_upcalls_ch, BW_UPCALLS_PERIOD, expire_bw_upcalls_send,
|
||||||
curvnet);
|
curvnet);
|
||||||
callout_reset(&V_bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process,
|
|
||||||
curvnet);
|
|
||||||
|
|
||||||
V_ip_mrouter = so;
|
V_ip_mrouter = so;
|
||||||
ip_mrouter_cnt++;
|
ip_mrouter_cnt++;
|
||||||
@ -745,7 +733,6 @@ X_ip_mrouter_done(void)
|
|||||||
|
|
||||||
callout_stop(&V_expire_upcalls_ch);
|
callout_stop(&V_expire_upcalls_ch);
|
||||||
callout_stop(&V_bw_upcalls_ch);
|
callout_stop(&V_bw_upcalls_ch);
|
||||||
callout_stop(&V_bw_meter_ch);
|
|
||||||
|
|
||||||
MFC_LOCK();
|
MFC_LOCK();
|
||||||
|
|
||||||
@ -766,7 +753,6 @@ X_ip_mrouter_done(void)
|
|||||||
bzero(V_nexpire, sizeof(V_nexpire[0]) * mfchashsize);
|
bzero(V_nexpire, sizeof(V_nexpire[0]) * mfchashsize);
|
||||||
|
|
||||||
V_bw_upcalls_n = 0;
|
V_bw_upcalls_n = 0;
|
||||||
bzero(V_bw_meter_timers, BW_METER_BUCKETS * sizeof(*V_bw_meter_timers));
|
|
||||||
|
|
||||||
MFC_UNLOCK();
|
MFC_UNLOCK();
|
||||||
|
|
||||||
@ -1036,7 +1022,8 @@ expire_mfc(struct mfc *rt)
|
|||||||
|
|
||||||
MFC_LOCK_ASSERT();
|
MFC_LOCK_ASSERT();
|
||||||
|
|
||||||
free_bw_list(rt->mfc_bw_meter);
|
free_bw_list(rt->mfc_bw_meter_leq);
|
||||||
|
free_bw_list(rt->mfc_bw_meter_geq);
|
||||||
|
|
||||||
TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) {
|
TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) {
|
||||||
m_freem(rte->m);
|
m_freem(rte->m);
|
||||||
@ -1139,7 +1126,8 @@ add_mfc(struct mfcctl2 *mfccp)
|
|||||||
rt->mfc_nstall = 0;
|
rt->mfc_nstall = 0;
|
||||||
|
|
||||||
rt->mfc_expire = 0;
|
rt->mfc_expire = 0;
|
||||||
rt->mfc_bw_meter = NULL;
|
rt->mfc_bw_meter_leq = NULL;
|
||||||
|
rt->mfc_bw_meter_geq = NULL;
|
||||||
|
|
||||||
/* insert new entry at head of hash chain */
|
/* insert new entry at head of hash chain */
|
||||||
LIST_INSERT_HEAD(&V_mfchashtbl[hash], rt, mfc_hash);
|
LIST_INSERT_HEAD(&V_mfchashtbl[hash], rt, mfc_hash);
|
||||||
@ -1179,8 +1167,10 @@ del_mfc(struct mfcctl2 *mfccp)
|
|||||||
/*
|
/*
|
||||||
* free the bw_meter entries
|
* free the bw_meter entries
|
||||||
*/
|
*/
|
||||||
free_bw_list(rt->mfc_bw_meter);
|
free_bw_list(rt->mfc_bw_meter_leq);
|
||||||
rt->mfc_bw_meter = NULL;
|
rt->mfc_bw_meter_leq = NULL;
|
||||||
|
free_bw_list(rt->mfc_bw_meter_geq);
|
||||||
|
rt->mfc_bw_meter_geq = NULL;
|
||||||
|
|
||||||
LIST_REMOVE(rt, mfc_hash);
|
LIST_REMOVE(rt, mfc_hash);
|
||||||
free(rt, M_MRTABLE);
|
free(rt, M_MRTABLE);
|
||||||
@ -1393,7 +1383,8 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m,
|
|||||||
|
|
||||||
/* clear the RP address */
|
/* clear the RP address */
|
||||||
rt->mfc_rp.s_addr = INADDR_ANY;
|
rt->mfc_rp.s_addr = INADDR_ANY;
|
||||||
rt->mfc_bw_meter = NULL;
|
rt->mfc_bw_meter_leq = NULL;
|
||||||
|
rt->mfc_bw_meter_geq = NULL;
|
||||||
|
|
||||||
/* initialize pkt counters per src-grp */
|
/* initialize pkt counters per src-grp */
|
||||||
rt->mfc_pkt_cnt = 0;
|
rt->mfc_pkt_cnt = 0;
|
||||||
@ -1459,16 +1450,6 @@ expire_upcalls(void *arg)
|
|||||||
if (rt->mfc_expire == 0 || --rt->mfc_expire > 0)
|
if (rt->mfc_expire == 0 || --rt->mfc_expire > 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/*
|
|
||||||
* free the bw_meter entries
|
|
||||||
*/
|
|
||||||
while (rt->mfc_bw_meter != NULL) {
|
|
||||||
struct bw_meter *x = rt->mfc_bw_meter;
|
|
||||||
|
|
||||||
rt->mfc_bw_meter = x->bm_mfc_next;
|
|
||||||
free(x, M_BWMETER);
|
|
||||||
}
|
|
||||||
|
|
||||||
MRTSTAT_INC(mrts_cache_cleanups);
|
MRTSTAT_INC(mrts_cache_cleanups);
|
||||||
CTR3(KTR_IPMF, "%s: expire (%lx, %lx)", __func__,
|
CTR3(KTR_IPMF, "%s: expire (%lx, %lx)", __func__,
|
||||||
(u_long)ntohl(rt->mfc_origin.s_addr),
|
(u_long)ntohl(rt->mfc_origin.s_addr),
|
||||||
@ -1602,14 +1583,22 @@ ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif)
|
|||||||
/*
|
/*
|
||||||
* Perform upcall-related bw measuring.
|
* Perform upcall-related bw measuring.
|
||||||
*/
|
*/
|
||||||
if (rt->mfc_bw_meter != NULL) {
|
if ((rt->mfc_bw_meter_geq != NULL) || (rt->mfc_bw_meter_leq != NULL)) {
|
||||||
struct bw_meter *x;
|
struct bw_meter *x;
|
||||||
struct timeval now;
|
struct timeval now;
|
||||||
|
|
||||||
microtime(&now);
|
microtime(&now);
|
||||||
MFC_LOCK_ASSERT();
|
MFC_LOCK_ASSERT();
|
||||||
for (x = rt->mfc_bw_meter; x != NULL; x = x->bm_mfc_next)
|
/* Process meters for Greater-or-EQual case */
|
||||||
bw_meter_receive_packet(x, plen, &now);
|
for (x = rt->mfc_bw_meter_geq; x != NULL; x = x->bm_mfc_next)
|
||||||
|
bw_meter_geq_receive_packet(x, plen, &now);
|
||||||
|
|
||||||
|
/* Process meters for Lower-or-EQual case */
|
||||||
|
for (x = rt->mfc_bw_meter_leq; x != NULL; x = x->bm_mfc_next) {
|
||||||
|
/* Record that a packet is received */
|
||||||
|
x->bm_measured.b_packets++;
|
||||||
|
x->bm_measured.b_bytes += plen;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -1759,6 +1748,44 @@ compute_bw_meter_flags(struct bw_upcall *req)
|
|||||||
return flags;
|
return flags;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
expire_bw_meter_leq(void *arg)
|
||||||
|
{
|
||||||
|
struct bw_meter *x = arg;
|
||||||
|
struct timeval now;
|
||||||
|
/*
|
||||||
|
* INFO:
|
||||||
|
* callout is always executed with MFC_LOCK taken
|
||||||
|
*/
|
||||||
|
|
||||||
|
CURVNET_SET((struct vnet *)x->arg);
|
||||||
|
|
||||||
|
microtime(&now);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test if we should deliver an upcall
|
||||||
|
*/
|
||||||
|
if (((x->bm_flags & BW_METER_UNIT_PACKETS) &&
|
||||||
|
(x->bm_measured.b_packets <= x->bm_threshold.b_packets)) ||
|
||||||
|
((x->bm_flags & BW_METER_UNIT_BYTES) &&
|
||||||
|
(x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) {
|
||||||
|
/* Prepare an upcall for delivery */
|
||||||
|
bw_meter_prepare_upcall(x, &now);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Send all upcalls that are pending delivery */
|
||||||
|
bw_upcalls_send();
|
||||||
|
|
||||||
|
/* Reset counters */
|
||||||
|
x->bm_start_time = now;
|
||||||
|
x->bm_measured.b_bytes = 0;
|
||||||
|
x->bm_measured.b_packets = 0;
|
||||||
|
|
||||||
|
callout_schedule(&x->bm_meter_callout, tvtohz(&x->bm_threshold.b_time));
|
||||||
|
|
||||||
|
CURVNET_RESTORE();
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Add a bw_meter entry
|
* Add a bw_meter entry
|
||||||
*/
|
*/
|
||||||
@ -1769,7 +1796,7 @@ add_bw_upcall(struct bw_upcall *req)
|
|||||||
struct timeval delta = { BW_UPCALL_THRESHOLD_INTERVAL_MIN_SEC,
|
struct timeval delta = { BW_UPCALL_THRESHOLD_INTERVAL_MIN_SEC,
|
||||||
BW_UPCALL_THRESHOLD_INTERVAL_MIN_USEC };
|
BW_UPCALL_THRESHOLD_INTERVAL_MIN_USEC };
|
||||||
struct timeval now;
|
struct timeval now;
|
||||||
struct bw_meter *x;
|
struct bw_meter *x, **bwm_ptr;
|
||||||
uint32_t flags;
|
uint32_t flags;
|
||||||
|
|
||||||
if (!(V_mrt_api_config & MRT_MFC_BW_UPCALL))
|
if (!(V_mrt_api_config & MRT_MFC_BW_UPCALL))
|
||||||
@ -1799,19 +1826,29 @@ add_bw_upcall(struct bw_upcall *req)
|
|||||||
MFC_UNLOCK();
|
MFC_UNLOCK();
|
||||||
return EADDRNOTAVAIL;
|
return EADDRNOTAVAIL;
|
||||||
}
|
}
|
||||||
for (x = mfc->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) {
|
|
||||||
|
/* Choose an appropriate bw_meter list */
|
||||||
|
if (req->bu_flags & BW_UPCALL_GEQ)
|
||||||
|
bwm_ptr = &mfc->mfc_bw_meter_geq;
|
||||||
|
else
|
||||||
|
bwm_ptr = &mfc->mfc_bw_meter_leq;
|
||||||
|
|
||||||
|
for (x = *bwm_ptr; x != NULL; x = x->bm_mfc_next) {
|
||||||
if ((BW_TIMEVALCMP(&x->bm_threshold.b_time,
|
if ((BW_TIMEVALCMP(&x->bm_threshold.b_time,
|
||||||
&req->bu_threshold.b_time, ==)) &&
|
&req->bu_threshold.b_time, ==))
|
||||||
(x->bm_threshold.b_packets == req->bu_threshold.b_packets) &&
|
&& (x->bm_threshold.b_packets
|
||||||
(x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) &&
|
== req->bu_threshold.b_packets)
|
||||||
(x->bm_flags & BW_METER_USER_FLAGS) == flags) {
|
&& (x->bm_threshold.b_bytes
|
||||||
|
== req->bu_threshold.b_bytes)
|
||||||
|
&& (x->bm_flags & BW_METER_USER_FLAGS)
|
||||||
|
== flags) {
|
||||||
MFC_UNLOCK();
|
MFC_UNLOCK();
|
||||||
return 0; /* XXX Already installed */
|
return 0; /* XXX Already installed */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Allocate the new bw_meter entry */
|
/* Allocate the new bw_meter entry */
|
||||||
x = (struct bw_meter *)malloc(sizeof(*x), M_BWMETER, M_NOWAIT);
|
x = (struct bw_meter*) malloc(sizeof(*x), M_BWMETER, M_NOWAIT);
|
||||||
if (x == NULL) {
|
if (x == NULL) {
|
||||||
MFC_UNLOCK();
|
MFC_UNLOCK();
|
||||||
return ENOBUFS;
|
return ENOBUFS;
|
||||||
@ -1827,13 +1864,20 @@ add_bw_upcall(struct bw_upcall *req)
|
|||||||
x->bm_measured.b_bytes = 0;
|
x->bm_measured.b_bytes = 0;
|
||||||
x->bm_flags = flags;
|
x->bm_flags = flags;
|
||||||
x->bm_time_next = NULL;
|
x->bm_time_next = NULL;
|
||||||
x->bm_time_hash = BW_METER_BUCKETS;
|
x->bm_mfc = mfc;
|
||||||
|
x->arg = curvnet;
|
||||||
|
|
||||||
|
/* For LEQ case create periodic callout */
|
||||||
|
if (req->bu_flags & BW_UPCALL_LEQ) {
|
||||||
|
callout_init_mtx(&x->bm_meter_callout, &mfc_mtx,0);
|
||||||
|
callout_reset(&x->bm_meter_callout, tvtohz(&x->bm_threshold.b_time),
|
||||||
|
expire_bw_meter_leq, x);
|
||||||
|
}
|
||||||
|
|
||||||
/* Add the new bw_meter entry to the front of entries for this MFC */
|
/* Add the new bw_meter entry to the front of entries for this MFC */
|
||||||
x->bm_mfc = mfc;
|
x->bm_mfc_next = *bwm_ptr;
|
||||||
x->bm_mfc_next = mfc->mfc_bw_meter;
|
*bwm_ptr = x;
|
||||||
mfc->mfc_bw_meter = x;
|
|
||||||
schedule_bw_meter(x, &now);
|
|
||||||
MFC_UNLOCK();
|
MFC_UNLOCK();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -1845,8 +1889,11 @@ free_bw_list(struct bw_meter *list)
|
|||||||
while (list != NULL) {
|
while (list != NULL) {
|
||||||
struct bw_meter *x = list;
|
struct bw_meter *x = list;
|
||||||
|
|
||||||
|
/* MFC_LOCK must be held here */
|
||||||
|
if (x->bm_flags & BW_METER_LEQ)
|
||||||
|
callout_drain(&x->bm_meter_callout);
|
||||||
|
|
||||||
list = list->bm_mfc_next;
|
list = list->bm_mfc_next;
|
||||||
unschedule_bw_meter(x);
|
|
||||||
free(x, M_BWMETER);
|
free(x, M_BWMETER);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1858,7 +1905,7 @@ static int
|
|||||||
del_bw_upcall(struct bw_upcall *req)
|
del_bw_upcall(struct bw_upcall *req)
|
||||||
{
|
{
|
||||||
struct mfc *mfc;
|
struct mfc *mfc;
|
||||||
struct bw_meter *x;
|
struct bw_meter *x, **bwm_ptr;
|
||||||
|
|
||||||
if (!(V_mrt_api_config & MRT_MFC_BW_UPCALL))
|
if (!(V_mrt_api_config & MRT_MFC_BW_UPCALL))
|
||||||
return EOPNOTSUPP;
|
return EOPNOTSUPP;
|
||||||
@ -1876,8 +1923,14 @@ del_bw_upcall(struct bw_upcall *req)
|
|||||||
*/
|
*/
|
||||||
struct bw_meter *list;
|
struct bw_meter *list;
|
||||||
|
|
||||||
list = mfc->mfc_bw_meter;
|
/* Free LEQ list */
|
||||||
mfc->mfc_bw_meter = NULL;
|
list = mfc->mfc_bw_meter_leq;
|
||||||
|
mfc->mfc_bw_meter_leq = NULL;
|
||||||
|
free_bw_list(list);
|
||||||
|
|
||||||
|
/* Free GEQ list */
|
||||||
|
list = mfc->mfc_bw_meter_geq;
|
||||||
|
mfc->mfc_bw_meter_geq = NULL;
|
||||||
free_bw_list(list);
|
free_bw_list(list);
|
||||||
MFC_UNLOCK();
|
MFC_UNLOCK();
|
||||||
return 0;
|
return 0;
|
||||||
@ -1887,8 +1940,14 @@ del_bw_upcall(struct bw_upcall *req)
|
|||||||
|
|
||||||
flags = compute_bw_meter_flags(req);
|
flags = compute_bw_meter_flags(req);
|
||||||
|
|
||||||
|
/* Choose an appropriate bw_meter list */
|
||||||
|
if (req->bu_flags & BW_UPCALL_GEQ)
|
||||||
|
bwm_ptr = &mfc->mfc_bw_meter_geq;
|
||||||
|
else
|
||||||
|
bwm_ptr = &mfc->mfc_bw_meter_leq;
|
||||||
|
|
||||||
/* Find the bw_meter entry to delete */
|
/* Find the bw_meter entry to delete */
|
||||||
for (prev = NULL, x = mfc->mfc_bw_meter; x != NULL;
|
for (prev = NULL, x = *bwm_ptr; x != NULL;
|
||||||
prev = x, x = x->bm_mfc_next) {
|
prev = x, x = x->bm_mfc_next) {
|
||||||
if ((BW_TIMEVALCMP(&x->bm_threshold.b_time,
|
if ((BW_TIMEVALCMP(&x->bm_threshold.b_time,
|
||||||
&req->bu_threshold.b_time, ==)) &&
|
&req->bu_threshold.b_time, ==)) &&
|
||||||
@ -1901,9 +1960,11 @@ del_bw_upcall(struct bw_upcall *req)
|
|||||||
if (prev != NULL)
|
if (prev != NULL)
|
||||||
prev->bm_mfc_next = x->bm_mfc_next; /* remove from middle*/
|
prev->bm_mfc_next = x->bm_mfc_next; /* remove from middle*/
|
||||||
else
|
else
|
||||||
x->bm_mfc->mfc_bw_meter = x->bm_mfc_next;/* new head of list */
|
*bwm_ptr = x->bm_mfc_next;/* new head of list */
|
||||||
|
|
||||||
|
if (req->bu_flags & BW_UPCALL_LEQ)
|
||||||
|
callout_stop(&x->bm_meter_callout);
|
||||||
|
|
||||||
unschedule_bw_meter(x);
|
|
||||||
MFC_UNLOCK();
|
MFC_UNLOCK();
|
||||||
/* Free the bw_meter entry */
|
/* Free the bw_meter entry */
|
||||||
free(x, M_BWMETER);
|
free(x, M_BWMETER);
|
||||||
@ -1920,7 +1981,7 @@ del_bw_upcall(struct bw_upcall *req)
|
|||||||
* Perform bandwidth measurement processing that may result in an upcall
|
* Perform bandwidth measurement processing that may result in an upcall
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
bw_meter_receive_packet(struct bw_meter *x, int plen, struct timeval *nowp)
|
bw_meter_geq_receive_packet(struct bw_meter *x, int plen, struct timeval *nowp)
|
||||||
{
|
{
|
||||||
struct timeval delta;
|
struct timeval delta;
|
||||||
|
|
||||||
@ -1929,7 +1990,6 @@ bw_meter_receive_packet(struct bw_meter *x, int plen, struct timeval *nowp)
|
|||||||
delta = *nowp;
|
delta = *nowp;
|
||||||
BW_TIMEVALDECR(&delta, &x->bm_start_time);
|
BW_TIMEVALDECR(&delta, &x->bm_start_time);
|
||||||
|
|
||||||
if (x->bm_flags & BW_METER_GEQ) {
|
|
||||||
/*
|
/*
|
||||||
* Processing for ">=" type of bw_meter entry
|
* Processing for ">=" type of bw_meter entry
|
||||||
*/
|
*/
|
||||||
@ -1958,54 +2018,6 @@ bw_meter_receive_packet(struct bw_meter *x, int plen, struct timeval *nowp)
|
|||||||
x->bm_flags |= BW_METER_UPCALL_DELIVERED;
|
x->bm_flags |= BW_METER_UPCALL_DELIVERED;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (x->bm_flags & BW_METER_LEQ) {
|
|
||||||
/*
|
|
||||||
* Processing for "<=" type of bw_meter entry
|
|
||||||
*/
|
|
||||||
if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) {
|
|
||||||
/*
|
|
||||||
* We are behind time with the multicast forwarding table
|
|
||||||
* scanning for "<=" type of bw_meter entries, so test now
|
|
||||||
* if we should deliver an upcall.
|
|
||||||
*/
|
|
||||||
if (((x->bm_flags & BW_METER_UNIT_PACKETS) &&
|
|
||||||
(x->bm_measured.b_packets <= x->bm_threshold.b_packets)) ||
|
|
||||||
((x->bm_flags & BW_METER_UNIT_BYTES) &&
|
|
||||||
(x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) {
|
|
||||||
/* Prepare an upcall for delivery */
|
|
||||||
bw_meter_prepare_upcall(x, nowp);
|
|
||||||
}
|
|
||||||
/* Reschedule the bw_meter entry */
|
|
||||||
unschedule_bw_meter(x);
|
|
||||||
schedule_bw_meter(x, nowp);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Record that a packet is received */
|
|
||||||
x->bm_measured.b_packets++;
|
|
||||||
x->bm_measured.b_bytes += plen;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Test if we should restart the measuring interval
|
|
||||||
*/
|
|
||||||
if ((x->bm_flags & BW_METER_UNIT_PACKETS &&
|
|
||||||
x->bm_measured.b_packets <= x->bm_threshold.b_packets) ||
|
|
||||||
(x->bm_flags & BW_METER_UNIT_BYTES &&
|
|
||||||
x->bm_measured.b_bytes <= x->bm_threshold.b_bytes)) {
|
|
||||||
/* Don't restart the measuring interval */
|
|
||||||
} else {
|
|
||||||
/* Do restart the measuring interval */
|
|
||||||
/*
|
|
||||||
* XXX: note that we don't unschedule and schedule, because this
|
|
||||||
* might be too much overhead per packet. Instead, when we process
|
|
||||||
* all entries for a given timer hash bin, we check whether it is
|
|
||||||
* really a timeout. If not, we reschedule at that time.
|
|
||||||
*/
|
|
||||||
x->bm_start_time = *nowp;
|
|
||||||
x->bm_measured.b_packets = 0;
|
|
||||||
x->bm_measured.b_bytes = 0;
|
|
||||||
x->bm_flags &= ~BW_METER_UPCALL_DELIVERED;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -2103,183 +2115,6 @@ bw_upcalls_send(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Compute the timeout hash value for the bw_meter entries
|
|
||||||
*/
|
|
||||||
#define BW_METER_TIMEHASH(bw_meter, hash) \
|
|
||||||
do { \
|
|
||||||
struct timeval next_timeval = (bw_meter)->bm_start_time; \
|
|
||||||
\
|
|
||||||
BW_TIMEVALADD(&next_timeval, &(bw_meter)->bm_threshold.b_time); \
|
|
||||||
(hash) = next_timeval.tv_sec; \
|
|
||||||
if (next_timeval.tv_usec) \
|
|
||||||
(hash)++; /* XXX: make sure we don't timeout early */ \
|
|
||||||
(hash) %= BW_METER_BUCKETS; \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Schedule a timer to process periodically bw_meter entry of type "<="
|
|
||||||
* by linking the entry in the proper hash bucket.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
schedule_bw_meter(struct bw_meter *x, struct timeval *nowp)
|
|
||||||
{
|
|
||||||
int time_hash;
|
|
||||||
|
|
||||||
MFC_LOCK_ASSERT();
|
|
||||||
|
|
||||||
if (!(x->bm_flags & BW_METER_LEQ))
|
|
||||||
return; /* XXX: we schedule timers only for "<=" entries */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Reset the bw_meter entry
|
|
||||||
*/
|
|
||||||
x->bm_start_time = *nowp;
|
|
||||||
x->bm_measured.b_packets = 0;
|
|
||||||
x->bm_measured.b_bytes = 0;
|
|
||||||
x->bm_flags &= ~BW_METER_UPCALL_DELIVERED;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Compute the timeout hash value and insert the entry
|
|
||||||
*/
|
|
||||||
BW_METER_TIMEHASH(x, time_hash);
|
|
||||||
x->bm_time_next = V_bw_meter_timers[time_hash];
|
|
||||||
V_bw_meter_timers[time_hash] = x;
|
|
||||||
x->bm_time_hash = time_hash;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Unschedule the periodic timer that processes bw_meter entry of type "<="
|
|
||||||
* by removing the entry from the proper hash bucket.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
unschedule_bw_meter(struct bw_meter *x)
|
|
||||||
{
|
|
||||||
int time_hash;
|
|
||||||
struct bw_meter *prev, *tmp;
|
|
||||||
|
|
||||||
MFC_LOCK_ASSERT();
|
|
||||||
|
|
||||||
if (!(x->bm_flags & BW_METER_LEQ))
|
|
||||||
return; /* XXX: we schedule timers only for "<=" entries */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Compute the timeout hash value and delete the entry
|
|
||||||
*/
|
|
||||||
time_hash = x->bm_time_hash;
|
|
||||||
if (time_hash >= BW_METER_BUCKETS)
|
|
||||||
return; /* Entry was not scheduled */
|
|
||||||
|
|
||||||
for (prev = NULL, tmp = V_bw_meter_timers[time_hash];
|
|
||||||
tmp != NULL; prev = tmp, tmp = tmp->bm_time_next)
|
|
||||||
if (tmp == x)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (tmp == NULL)
|
|
||||||
panic("unschedule_bw_meter: bw_meter entry not found");
|
|
||||||
|
|
||||||
if (prev != NULL)
|
|
||||||
prev->bm_time_next = x->bm_time_next;
|
|
||||||
else
|
|
||||||
V_bw_meter_timers[time_hash] = x->bm_time_next;
|
|
||||||
|
|
||||||
x->bm_time_next = NULL;
|
|
||||||
x->bm_time_hash = BW_METER_BUCKETS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Process all "<=" type of bw_meter that should be processed now,
|
|
||||||
* and for each entry prepare an upcall if necessary. Each processed
|
|
||||||
* entry is rescheduled again for the (periodic) processing.
|
|
||||||
*
|
|
||||||
* This is run periodically (once per second normally). On each round,
|
|
||||||
* all the potentially matching entries are in the hash slot that we are
|
|
||||||
* looking at.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
bw_meter_process()
|
|
||||||
{
|
|
||||||
uint32_t loops;
|
|
||||||
int i;
|
|
||||||
struct timeval now, process_endtime;
|
|
||||||
|
|
||||||
microtime(&now);
|
|
||||||
if (V_last_tv_sec == now.tv_sec)
|
|
||||||
return; /* nothing to do */
|
|
||||||
|
|
||||||
loops = now.tv_sec - V_last_tv_sec;
|
|
||||||
V_last_tv_sec = now.tv_sec;
|
|
||||||
if (loops > BW_METER_BUCKETS)
|
|
||||||
loops = BW_METER_BUCKETS;
|
|
||||||
|
|
||||||
MFC_LOCK();
|
|
||||||
/*
|
|
||||||
* Process all bins of bw_meter entries from the one after the last
|
|
||||||
* processed to the current one. On entry, i points to the last bucket
|
|
||||||
* visited, so we need to increment i at the beginning of the loop.
|
|
||||||
*/
|
|
||||||
for (i = (now.tv_sec - loops) % BW_METER_BUCKETS; loops > 0; loops--) {
|
|
||||||
struct bw_meter *x, *tmp_list;
|
|
||||||
|
|
||||||
if (++i >= BW_METER_BUCKETS)
|
|
||||||
i = 0;
|
|
||||||
|
|
||||||
/* Disconnect the list of bw_meter entries from the bin */
|
|
||||||
tmp_list = V_bw_meter_timers[i];
|
|
||||||
V_bw_meter_timers[i] = NULL;
|
|
||||||
|
|
||||||
/* Process the list of bw_meter entries */
|
|
||||||
while (tmp_list != NULL) {
|
|
||||||
x = tmp_list;
|
|
||||||
tmp_list = tmp_list->bm_time_next;
|
|
||||||
|
|
||||||
/* Test if the time interval is over */
|
|
||||||
process_endtime = x->bm_start_time;
|
|
||||||
BW_TIMEVALADD(&process_endtime, &x->bm_threshold.b_time);
|
|
||||||
if (BW_TIMEVALCMP(&process_endtime, &now, >)) {
|
|
||||||
/* Not yet: reschedule, but don't reset */
|
|
||||||
int time_hash;
|
|
||||||
|
|
||||||
BW_METER_TIMEHASH(x, time_hash);
|
|
||||||
if (time_hash == i && process_endtime.tv_sec == now.tv_sec) {
|
|
||||||
/*
|
|
||||||
* XXX: somehow the bin processing is a bit ahead of time.
|
|
||||||
* Put the entry in the next bin.
|
|
||||||
*/
|
|
||||||
if (++time_hash >= BW_METER_BUCKETS)
|
|
||||||
time_hash = 0;
|
|
||||||
}
|
|
||||||
x->bm_time_next = V_bw_meter_timers[time_hash];
|
|
||||||
V_bw_meter_timers[time_hash] = x;
|
|
||||||
x->bm_time_hash = time_hash;
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Test if we should deliver an upcall
|
|
||||||
*/
|
|
||||||
if (((x->bm_flags & BW_METER_UNIT_PACKETS) &&
|
|
||||||
(x->bm_measured.b_packets <= x->bm_threshold.b_packets)) ||
|
|
||||||
((x->bm_flags & BW_METER_UNIT_BYTES) &&
|
|
||||||
(x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) {
|
|
||||||
/* Prepare an upcall for delivery */
|
|
||||||
bw_meter_prepare_upcall(x, &now);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Reschedule for next processing
|
|
||||||
*/
|
|
||||||
schedule_bw_meter(x, &now);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Send all upcalls that are pending delivery */
|
|
||||||
bw_upcalls_send();
|
|
||||||
|
|
||||||
MFC_UNLOCK();
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A periodic function for sending all upcalls that are pending delivery
|
* A periodic function for sending all upcalls that are pending delivery
|
||||||
*/
|
*/
|
||||||
@ -2297,23 +2132,6 @@ expire_bw_upcalls_send(void *arg)
|
|||||||
CURVNET_RESTORE();
|
CURVNET_RESTORE();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* A periodic function for periodic scanning of the multicast forwarding
|
|
||||||
* table for processing all "<=" bw_meter entries.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
expire_bw_meter_process(void *arg)
|
|
||||||
{
|
|
||||||
CURVNET_SET((struct vnet *) arg);
|
|
||||||
|
|
||||||
if (V_mrt_api_config & MRT_MFC_BW_UPCALL)
|
|
||||||
bw_meter_process();
|
|
||||||
|
|
||||||
callout_reset(&V_bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process,
|
|
||||||
curvnet);
|
|
||||||
CURVNET_RESTORE();
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* End of bandwidth monitoring code
|
* End of bandwidth monitoring code
|
||||||
*/
|
*/
|
||||||
@ -2835,14 +2653,11 @@ vnet_mroute_init(const void *unused __unused)
|
|||||||
|
|
||||||
V_viftable = mallocarray(MAXVIFS, sizeof(*V_viftable),
|
V_viftable = mallocarray(MAXVIFS, sizeof(*V_viftable),
|
||||||
M_MRTABLE, M_WAITOK|M_ZERO);
|
M_MRTABLE, M_WAITOK|M_ZERO);
|
||||||
V_bw_meter_timers = mallocarray(BW_METER_BUCKETS,
|
|
||||||
sizeof(*V_bw_meter_timers), M_MRTABLE, M_WAITOK|M_ZERO);
|
|
||||||
V_bw_upcalls = mallocarray(BW_UPCALLS_MAX, sizeof(*V_bw_upcalls),
|
V_bw_upcalls = mallocarray(BW_UPCALLS_MAX, sizeof(*V_bw_upcalls),
|
||||||
M_MRTABLE, M_WAITOK|M_ZERO);
|
M_MRTABLE, M_WAITOK|M_ZERO);
|
||||||
|
|
||||||
callout_init(&V_expire_upcalls_ch, 1);
|
callout_init(&V_expire_upcalls_ch, 1);
|
||||||
callout_init(&V_bw_upcalls_ch, 1);
|
callout_init(&V_bw_upcalls_ch, 1);
|
||||||
callout_init(&V_bw_meter_ch, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
VNET_SYSINIT(vnet_mroute_init, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mroute_init,
|
VNET_SYSINIT(vnet_mroute_init, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mroute_init,
|
||||||
@ -2853,7 +2668,6 @@ vnet_mroute_uninit(const void *unused __unused)
|
|||||||
{
|
{
|
||||||
|
|
||||||
free(V_bw_upcalls, M_MRTABLE);
|
free(V_bw_upcalls, M_MRTABLE);
|
||||||
free(V_bw_meter_timers, M_MRTABLE);
|
|
||||||
free(V_viftable, M_MRTABLE);
|
free(V_viftable, M_MRTABLE);
|
||||||
free(V_nexpire, M_MRTABLE);
|
free(V_nexpire, M_MRTABLE);
|
||||||
V_nexpire = NULL;
|
V_nexpire = NULL;
|
||||||
|
@ -266,7 +266,7 @@ struct vif {
|
|||||||
u_long v_bytes_out; /* # bytes out on interface */
|
u_long v_bytes_out; /* # bytes out on interface */
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef _KERNEL
|
#if defined(_KERNEL) || defined (_NETSTAT)
|
||||||
/*
|
/*
|
||||||
* The kernel's multicast forwarding cache entry structure
|
* The kernel's multicast forwarding cache entry structure
|
||||||
*/
|
*/
|
||||||
@ -283,7 +283,10 @@ struct mfc {
|
|||||||
struct timeval mfc_last_assert; /* last time I sent an assert*/
|
struct timeval mfc_last_assert; /* last time I sent an assert*/
|
||||||
uint8_t mfc_flags[MAXVIFS]; /* the MRT_MFC_FLAGS_* flags */
|
uint8_t mfc_flags[MAXVIFS]; /* the MRT_MFC_FLAGS_* flags */
|
||||||
struct in_addr mfc_rp; /* the RP address */
|
struct in_addr mfc_rp; /* the RP address */
|
||||||
struct bw_meter *mfc_bw_meter; /* list of bandwidth meters */
|
struct bw_meter *mfc_bw_meter_leq; /* list of bandwidth meters
|
||||||
|
for Lower-or-EQual case */
|
||||||
|
struct bw_meter *mfc_bw_meter_geq; /* list of bandwidth meters
|
||||||
|
for Greater-or-EQual case */
|
||||||
u_long mfc_nstall; /* # of packets awaiting mfc */
|
u_long mfc_nstall; /* # of packets awaiting mfc */
|
||||||
TAILQ_HEAD(, rtdetq) mfc_stall; /* q of packets awaiting mfc */
|
TAILQ_HEAD(, rtdetq) mfc_stall; /* q of packets awaiting mfc */
|
||||||
};
|
};
|
||||||
@ -327,7 +330,6 @@ struct rtdetq {
|
|||||||
struct bw_meter {
|
struct bw_meter {
|
||||||
struct bw_meter *bm_mfc_next; /* next bw meter (same mfc) */
|
struct bw_meter *bm_mfc_next; /* next bw meter (same mfc) */
|
||||||
struct bw_meter *bm_time_next; /* next bw meter (same time) */
|
struct bw_meter *bm_time_next; /* next bw meter (same time) */
|
||||||
uint32_t bm_time_hash; /* the time hash value */
|
|
||||||
struct mfc *bm_mfc; /* the corresponding mfc */
|
struct mfc *bm_mfc; /* the corresponding mfc */
|
||||||
uint32_t bm_flags; /* misc flags (see below) */
|
uint32_t bm_flags; /* misc flags (see below) */
|
||||||
#define BW_METER_UNIT_PACKETS (1 << 0) /* threshold (in packets) */
|
#define BW_METER_UNIT_PACKETS (1 << 0) /* threshold (in packets) */
|
||||||
@ -344,6 +346,10 @@ struct bw_meter {
|
|||||||
struct bw_data bm_threshold; /* the upcall threshold */
|
struct bw_data bm_threshold; /* the upcall threshold */
|
||||||
struct bw_data bm_measured; /* the measured bw */
|
struct bw_data bm_measured; /* the measured bw */
|
||||||
struct timeval bm_start_time; /* abs. time */
|
struct timeval bm_start_time; /* abs. time */
|
||||||
|
#ifdef _KERNEL
|
||||||
|
struct callout bm_meter_callout; /* Periodic callout */
|
||||||
|
void* arg; /* custom argument */
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
|
@ -62,9 +62,9 @@ __FBSDID("$FreeBSD$");
|
|||||||
#include <netinet/igmp.h>
|
#include <netinet/igmp.h>
|
||||||
#include <net/route.h>
|
#include <net/route.h>
|
||||||
|
|
||||||
#define _KERNEL 1
|
#define _NETSTAT 1
|
||||||
#include <netinet/ip_mroute.h>
|
#include <netinet/ip_mroute.h>
|
||||||
#undef _KERNEL
|
#undef _NETSTAT_
|
||||||
|
|
||||||
#include <err.h>
|
#include <err.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
@ -213,7 +213,16 @@ print_mfc(struct mfc *m, int maxvif, int *banner_printed)
|
|||||||
* XXX We break the rules and try to use KVM to read the
|
* XXX We break the rules and try to use KVM to read the
|
||||||
* bandwidth meters, they are not retrievable via sysctl yet.
|
* bandwidth meters, they are not retrievable via sysctl yet.
|
||||||
*/
|
*/
|
||||||
bwm = m->mfc_bw_meter;
|
bwm = m->mfc_bw_meter_leq;
|
||||||
|
while (bwm != NULL) {
|
||||||
|
error = kread((u_long)bwm, (char *)&bw_meter,
|
||||||
|
sizeof(bw_meter));
|
||||||
|
if (error)
|
||||||
|
break;
|
||||||
|
print_bw_meter(&bw_meter, &bw_banner_printed);
|
||||||
|
bwm = bw_meter.bm_mfc_next;
|
||||||
|
}
|
||||||
|
bwm = m->mfc_bw_meter_geq;
|
||||||
while (bwm != NULL) {
|
while (bwm != NULL) {
|
||||||
error = kread((u_long)bwm, (char *)&bw_meter,
|
error = kread((u_long)bwm, (char *)&bw_meter,
|
||||||
sizeof(bw_meter));
|
sizeof(bw_meter));
|
||||||
|
Loading…
Reference in New Issue
Block a user