sched: remove WRR from strict priority TC queues
All higher priority traffic classes contain only one queue, thus remove wrr function for them. The lowest priority best-effort traffic class conitnue to have multiple queues and packet are scheduled from its queues using wrr function. Signed-off-by: Jasvinder Singh <jasvinder.singh@intel.com> Signed-off-by: Abraham Tovar <abrahamx.tovar@intel.com> Signed-off-by: Lukasz Krakowiak <lukaszx.krakowiak@intel.com>
This commit is contained in:
parent
a3549d2716
commit
e16b06da09
@ -40,7 +40,7 @@ static struct rte_sched_pipe_params pipe_profile[] = {
|
||||
.tc_rate = {305175, 305175, 305175, 305175},
|
||||
.tc_period = 40,
|
||||
|
||||
.wrr_weights = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
.wrr_weights = {1, 1, 1, 1},
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -186,7 +186,7 @@ static struct rte_sched_pipe_params pipe_profiles[RTE_SCHED_PIPE_PROFILES_PER_PO
|
||||
.tc_ov_weight = 1,
|
||||
#endif
|
||||
|
||||
.wrr_weights = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
.wrr_weights = {1, 1, 1, 1},
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -18,7 +18,7 @@ LDLIBS += -lrte_timer
|
||||
|
||||
EXPORT_MAP := rte_sched_version.map
|
||||
|
||||
LIBABIVER := 2
|
||||
LIBABIVER := 3
|
||||
|
||||
#
|
||||
# all source are stored in SRCS-y
|
||||
|
@ -1,7 +1,7 @@
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
# Copyright(c) 2017 Intel Corporation
|
||||
|
||||
version = 2
|
||||
version = 3
|
||||
sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
|
||||
headers = files('rte_sched.h', 'rte_sched_common.h',
|
||||
'rte_red.h', 'rte_approx.h')
|
||||
|
@ -37,6 +37,7 @@
|
||||
|
||||
#define RTE_SCHED_TB_RATE_CONFIG_ERR (1e-7)
|
||||
#define RTE_SCHED_WRR_SHIFT 3
|
||||
#define RTE_SCHED_MAX_QUEUES_PER_TC RTE_SCHED_BE_QUEUES_PER_PIPE
|
||||
#define RTE_SCHED_GRINDER_PCACHE_SIZE (64 / RTE_SCHED_QUEUES_PER_PIPE)
|
||||
#define RTE_SCHED_PIPE_INVALID UINT32_MAX
|
||||
#define RTE_SCHED_BMP_POS_INVALID UINT32_MAX
|
||||
@ -84,8 +85,8 @@ struct rte_sched_pipe_profile {
|
||||
uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
|
||||
uint8_t tc_ov_weight;
|
||||
|
||||
/* Pipe queues */
|
||||
uint8_t wrr_cost[RTE_SCHED_QUEUES_PER_PIPE];
|
||||
/* Pipe best-effort traffic class queues */
|
||||
uint8_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
|
||||
};
|
||||
|
||||
struct rte_sched_pipe {
|
||||
@ -101,7 +102,7 @@ struct rte_sched_pipe {
|
||||
uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
|
||||
|
||||
/* Weighted Round Robin (WRR) */
|
||||
uint8_t wrr_tokens[RTE_SCHED_QUEUES_PER_PIPE];
|
||||
uint8_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE];
|
||||
|
||||
/* TC oversubscription */
|
||||
uint32_t tc_ov_credits;
|
||||
@ -153,16 +154,16 @@ struct rte_sched_grinder {
|
||||
uint32_t tc_index;
|
||||
struct rte_sched_queue *queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
|
||||
struct rte_mbuf **qbase[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
|
||||
uint32_t qindex[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
|
||||
uint32_t qindex[RTE_SCHED_MAX_QUEUES_PER_TC];
|
||||
uint16_t qsize;
|
||||
uint32_t qmask;
|
||||
uint32_t qpos;
|
||||
struct rte_mbuf *pkt;
|
||||
|
||||
/* WRR */
|
||||
uint16_t wrr_tokens[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
|
||||
uint16_t wrr_mask[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
|
||||
uint8_t wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
|
||||
uint16_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE];
|
||||
uint16_t wrr_mask[RTE_SCHED_BE_QUEUES_PER_PIPE];
|
||||
uint8_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
|
||||
};
|
||||
|
||||
struct rte_sched_port {
|
||||
@ -483,7 +484,7 @@ rte_sched_port_log_pipe_profile(struct rte_sched_port *port, uint32_t i)
|
||||
" Token bucket: period = %u, credits per period = %u, size = %u\n"
|
||||
" Traffic classes: period = %u, credits per period = [%u, %u, %u, %u]\n"
|
||||
" Traffic class 3 oversubscription: weight = %hhu\n"
|
||||
" WRR cost: [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu]\n",
|
||||
" WRR cost: [%hhu, %hhu, %hhu, %hhu]\n",
|
||||
i,
|
||||
|
||||
/* Token bucket */
|
||||
@ -502,10 +503,7 @@ rte_sched_port_log_pipe_profile(struct rte_sched_port *port, uint32_t i)
|
||||
p->tc_ov_weight,
|
||||
|
||||
/* WRR */
|
||||
p->wrr_cost[ 0], p->wrr_cost[ 1], p->wrr_cost[ 2], p->wrr_cost[ 3],
|
||||
p->wrr_cost[ 4], p->wrr_cost[ 5], p->wrr_cost[ 6], p->wrr_cost[ 7],
|
||||
p->wrr_cost[ 8], p->wrr_cost[ 9], p->wrr_cost[10], p->wrr_cost[11],
|
||||
p->wrr_cost[12], p->wrr_cost[13], p->wrr_cost[14], p->wrr_cost[15]);
|
||||
p->wrr_cost[0], p->wrr_cost[1], p->wrr_cost[2], p->wrr_cost[3]);
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
@ -523,6 +521,8 @@ rte_sched_pipe_profile_convert(struct rte_sched_pipe_params *src,
|
||||
struct rte_sched_pipe_profile *dst,
|
||||
uint32_t rate)
|
||||
{
|
||||
uint32_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
|
||||
uint32_t lcd1, lcd2, lcd;
|
||||
uint32_t i;
|
||||
|
||||
/* Token Bucket */
|
||||
@ -553,33 +553,25 @@ rte_sched_pipe_profile_convert(struct rte_sched_pipe_params *src,
|
||||
dst->tc_ov_weight = src->tc_ov_weight;
|
||||
#endif
|
||||
|
||||
/* WRR */
|
||||
for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
|
||||
uint32_t wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
|
||||
uint32_t lcd, lcd1, lcd2;
|
||||
uint32_t qindex;
|
||||
/* WRR queues */
|
||||
wrr_cost[0] = src->wrr_weights[0];
|
||||
wrr_cost[1] = src->wrr_weights[1];
|
||||
wrr_cost[2] = src->wrr_weights[2];
|
||||
wrr_cost[3] = src->wrr_weights[3];
|
||||
|
||||
qindex = i * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS;
|
||||
lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]);
|
||||
lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]);
|
||||
lcd = rte_get_lcd(lcd1, lcd2);
|
||||
|
||||
wrr_cost[0] = src->wrr_weights[qindex];
|
||||
wrr_cost[1] = src->wrr_weights[qindex + 1];
|
||||
wrr_cost[2] = src->wrr_weights[qindex + 2];
|
||||
wrr_cost[3] = src->wrr_weights[qindex + 3];
|
||||
wrr_cost[0] = lcd / wrr_cost[0];
|
||||
wrr_cost[1] = lcd / wrr_cost[1];
|
||||
wrr_cost[2] = lcd / wrr_cost[2];
|
||||
wrr_cost[3] = lcd / wrr_cost[3];
|
||||
|
||||
lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]);
|
||||
lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]);
|
||||
lcd = rte_get_lcd(lcd1, lcd2);
|
||||
|
||||
wrr_cost[0] = lcd / wrr_cost[0];
|
||||
wrr_cost[1] = lcd / wrr_cost[1];
|
||||
wrr_cost[2] = lcd / wrr_cost[2];
|
||||
wrr_cost[3] = lcd / wrr_cost[3];
|
||||
|
||||
dst->wrr_cost[qindex] = (uint8_t) wrr_cost[0];
|
||||
dst->wrr_cost[qindex + 1] = (uint8_t) wrr_cost[1];
|
||||
dst->wrr_cost[qindex + 2] = (uint8_t) wrr_cost[2];
|
||||
dst->wrr_cost[qindex + 3] = (uint8_t) wrr_cost[3];
|
||||
}
|
||||
dst->wrr_cost[0] = (uint8_t) wrr_cost[0];
|
||||
dst->wrr_cost[1] = (uint8_t) wrr_cost[1];
|
||||
dst->wrr_cost[2] = (uint8_t) wrr_cost[2];
|
||||
dst->wrr_cost[3] = (uint8_t) wrr_cost[3];
|
||||
}
|
||||
|
||||
static void
|
||||
@ -1715,6 +1707,7 @@ grinder_schedule(struct rte_sched_port *port, uint32_t pos)
|
||||
struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
|
||||
struct rte_mbuf *pkt = grinder->pkt;
|
||||
uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
|
||||
uint32_t be_tc_active;
|
||||
|
||||
if (!grinder_credits_check(port, pos))
|
||||
return 0;
|
||||
@ -1725,13 +1718,18 @@ grinder_schedule(struct rte_sched_port *port, uint32_t pos)
|
||||
/* Send packet */
|
||||
port->pkts_out[port->n_pkts_out++] = pkt;
|
||||
queue->qr++;
|
||||
grinder->wrr_tokens[grinder->qpos] += pkt_len * grinder->wrr_cost[grinder->qpos];
|
||||
|
||||
be_tc_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE) ? ~0x0 : 0x0;
|
||||
grinder->wrr_tokens[grinder->qpos] +=
|
||||
(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
|
||||
|
||||
if (queue->qr == queue->qw) {
|
||||
uint32_t qindex = grinder->qindex[grinder->qpos];
|
||||
|
||||
rte_bitmap_clear(port->bmp, qindex);
|
||||
grinder->qmask &= ~(1 << grinder->qpos);
|
||||
grinder->wrr_mask[grinder->qpos] = 0;
|
||||
if (be_tc_active)
|
||||
grinder->wrr_mask[grinder->qpos] = 0;
|
||||
rte_sched_port_set_queue_empty_timestamp(port, qindex);
|
||||
}
|
||||
|
||||
@ -1962,26 +1960,26 @@ grinder_wrr_load(struct rte_sched_port *port, uint32_t pos)
|
||||
struct rte_sched_grinder *grinder = port->grinder + pos;
|
||||
struct rte_sched_pipe *pipe = grinder->pipe;
|
||||
struct rte_sched_pipe_profile *pipe_params = grinder->pipe_params;
|
||||
uint32_t tc_index = grinder->tc_index;
|
||||
uint32_t qmask = grinder->qmask;
|
||||
uint32_t qindex;
|
||||
|
||||
qindex = tc_index * 4;
|
||||
|
||||
grinder->wrr_tokens[0] = ((uint16_t) pipe->wrr_tokens[qindex]) << RTE_SCHED_WRR_SHIFT;
|
||||
grinder->wrr_tokens[1] = ((uint16_t) pipe->wrr_tokens[qindex + 1]) << RTE_SCHED_WRR_SHIFT;
|
||||
grinder->wrr_tokens[2] = ((uint16_t) pipe->wrr_tokens[qindex + 2]) << RTE_SCHED_WRR_SHIFT;
|
||||
grinder->wrr_tokens[3] = ((uint16_t) pipe->wrr_tokens[qindex + 3]) << RTE_SCHED_WRR_SHIFT;
|
||||
grinder->wrr_tokens[0] =
|
||||
((uint16_t) pipe->wrr_tokens[0]) << RTE_SCHED_WRR_SHIFT;
|
||||
grinder->wrr_tokens[1] =
|
||||
((uint16_t) pipe->wrr_tokens[1]) << RTE_SCHED_WRR_SHIFT;
|
||||
grinder->wrr_tokens[2] =
|
||||
((uint16_t) pipe->wrr_tokens[2]) << RTE_SCHED_WRR_SHIFT;
|
||||
grinder->wrr_tokens[3] =
|
||||
((uint16_t) pipe->wrr_tokens[3]) << RTE_SCHED_WRR_SHIFT;
|
||||
|
||||
grinder->wrr_mask[0] = (qmask & 0x1) * 0xFFFF;
|
||||
grinder->wrr_mask[1] = ((qmask >> 1) & 0x1) * 0xFFFF;
|
||||
grinder->wrr_mask[2] = ((qmask >> 2) & 0x1) * 0xFFFF;
|
||||
grinder->wrr_mask[3] = ((qmask >> 3) & 0x1) * 0xFFFF;
|
||||
|
||||
grinder->wrr_cost[0] = pipe_params->wrr_cost[qindex];
|
||||
grinder->wrr_cost[1] = pipe_params->wrr_cost[qindex + 1];
|
||||
grinder->wrr_cost[2] = pipe_params->wrr_cost[qindex + 2];
|
||||
grinder->wrr_cost[3] = pipe_params->wrr_cost[qindex + 3];
|
||||
grinder->wrr_cost[0] = pipe_params->wrr_cost[0];
|
||||
grinder->wrr_cost[1] = pipe_params->wrr_cost[1];
|
||||
grinder->wrr_cost[2] = pipe_params->wrr_cost[2];
|
||||
grinder->wrr_cost[3] = pipe_params->wrr_cost[3];
|
||||
}
|
||||
|
||||
static inline void
|
||||
@ -1989,19 +1987,19 @@ grinder_wrr_store(struct rte_sched_port *port, uint32_t pos)
|
||||
{
|
||||
struct rte_sched_grinder *grinder = port->grinder + pos;
|
||||
struct rte_sched_pipe *pipe = grinder->pipe;
|
||||
uint32_t tc_index = grinder->tc_index;
|
||||
uint32_t qindex;
|
||||
|
||||
qindex = tc_index * 4;
|
||||
|
||||
pipe->wrr_tokens[qindex] = (grinder->wrr_tokens[0] & grinder->wrr_mask[0])
|
||||
>> RTE_SCHED_WRR_SHIFT;
|
||||
pipe->wrr_tokens[qindex + 1] = (grinder->wrr_tokens[1] & grinder->wrr_mask[1])
|
||||
>> RTE_SCHED_WRR_SHIFT;
|
||||
pipe->wrr_tokens[qindex + 2] = (grinder->wrr_tokens[2] & grinder->wrr_mask[2])
|
||||
>> RTE_SCHED_WRR_SHIFT;
|
||||
pipe->wrr_tokens[qindex + 3] = (grinder->wrr_tokens[3] & grinder->wrr_mask[3])
|
||||
>> RTE_SCHED_WRR_SHIFT;
|
||||
pipe->wrr_tokens[0] =
|
||||
(grinder->wrr_tokens[0] & grinder->wrr_mask[0]) >>
|
||||
RTE_SCHED_WRR_SHIFT;
|
||||
pipe->wrr_tokens[1] =
|
||||
(grinder->wrr_tokens[1] & grinder->wrr_mask[1]) >>
|
||||
RTE_SCHED_WRR_SHIFT;
|
||||
pipe->wrr_tokens[2] =
|
||||
(grinder->wrr_tokens[2] & grinder->wrr_mask[2]) >>
|
||||
RTE_SCHED_WRR_SHIFT;
|
||||
pipe->wrr_tokens[3] =
|
||||
(grinder->wrr_tokens[3] & grinder->wrr_mask[3]) >>
|
||||
RTE_SCHED_WRR_SHIFT;
|
||||
}
|
||||
|
||||
static inline void
|
||||
@ -2040,9 +2038,18 @@ static inline void
|
||||
grinder_prefetch_tc_queue_arrays(struct rte_sched_port *port, uint32_t pos)
|
||||
{
|
||||
struct rte_sched_grinder *grinder = port->grinder + pos;
|
||||
uint16_t qsize, qr[4];
|
||||
uint16_t qsize, qr[RTE_SCHED_MAX_QUEUES_PER_TC];
|
||||
|
||||
qsize = grinder->qsize;
|
||||
grinder->qpos = 0;
|
||||
|
||||
if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) {
|
||||
qr[0] = grinder->queue[0]->qr & (qsize - 1);
|
||||
|
||||
rte_prefetch0(grinder->qbase[0] + qr[0]);
|
||||
return;
|
||||
}
|
||||
|
||||
qr[0] = grinder->queue[0]->qr & (qsize - 1);
|
||||
qr[1] = grinder->queue[1]->qr & (qsize - 1);
|
||||
qr[2] = grinder->queue[2]->qr & (qsize - 1);
|
||||
@ -2118,18 +2125,24 @@ grinder_handle(struct rte_sched_port *port, uint32_t pos)
|
||||
|
||||
case e_GRINDER_READ_MBUF:
|
||||
{
|
||||
uint32_t result = 0;
|
||||
uint32_t wrr_active, result = 0;
|
||||
|
||||
result = grinder_schedule(port, pos);
|
||||
|
||||
wrr_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE);
|
||||
|
||||
/* Look for next packet within the same TC */
|
||||
if (result && grinder->qmask) {
|
||||
grinder_wrr(port, pos);
|
||||
if (wrr_active)
|
||||
grinder_wrr(port, pos);
|
||||
|
||||
grinder_prefetch_mbuf(port, pos);
|
||||
|
||||
return 1;
|
||||
}
|
||||
grinder_wrr_store(port, pos);
|
||||
|
||||
if (wrr_active)
|
||||
grinder_wrr_store(port, pos);
|
||||
|
||||
/* Look for another active TC within same pipe */
|
||||
if (grinder_next_tc(port, pos)) {
|
||||
|
@ -66,18 +66,35 @@ extern "C" {
|
||||
#include "rte_red.h"
|
||||
#endif
|
||||
|
||||
/** Maximum number of queues per pipe.
|
||||
* Note that the multiple queues (power of 2) can only be assigned to
|
||||
* lowest priority (best-effort) traffic class. Other higher priority traffic
|
||||
* classes can only have one queue.
|
||||
* Can not change.
|
||||
*
|
||||
* @see struct rte_sched_port_params
|
||||
*/
|
||||
#define RTE_SCHED_QUEUES_PER_PIPE 16
|
||||
|
||||
/** Number of WRR queues for best-effort traffic class per pipe.
|
||||
*
|
||||
* @see struct rte_sched_pipe_params
|
||||
*/
|
||||
#define RTE_SCHED_BE_QUEUES_PER_PIPE 4
|
||||
|
||||
/** Number of traffic classes per pipe (as well as subport).
|
||||
* Cannot be changed.
|
||||
*/
|
||||
#define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE 4
|
||||
|
||||
/** Best-effort traffic class ID
|
||||
* Can not change.
|
||||
*/
|
||||
#define RTE_SCHED_TRAFFIC_CLASS_BE (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1)
|
||||
|
||||
/** Number of queues per pipe traffic class. Cannot be changed. */
|
||||
#define RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS 4
|
||||
|
||||
/** Number of queues per pipe. */
|
||||
#define RTE_SCHED_QUEUES_PER_PIPE \
|
||||
(RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * \
|
||||
RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS)
|
||||
|
||||
/** Maximum number of pipe profiles that can be defined per port.
|
||||
* Compile-time configurable.
|
||||
@ -165,7 +182,7 @@ struct rte_sched_pipe_params {
|
||||
#endif
|
||||
|
||||
/* Pipe queues */
|
||||
uint8_t wrr_weights[RTE_SCHED_QUEUES_PER_PIPE]; /**< WRR weights */
|
||||
uint8_t wrr_weights[RTE_SCHED_BE_QUEUES_PER_PIPE]; /**< WRR weights */
|
||||
};
|
||||
|
||||
/** Queue statistics */
|
||||
|
Loading…
Reference in New Issue
Block a user