diff --git a/app/test/test_sched.c b/app/test/test_sched.c index 49bb9ea6f5..36fa2d4258 100644 --- a/app/test/test_sched.c +++ b/app/test/test_sched.c @@ -40,7 +40,7 @@ static struct rte_sched_pipe_params pipe_profile[] = { .tc_rate = {305175, 305175, 305175, 305175}, .tc_period = 40, - .wrr_weights = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + .wrr_weights = {1, 1, 1, 1}, }, }; diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c index 1209bd7ce6..6b63d4e0e2 100644 --- a/examples/qos_sched/init.c +++ b/examples/qos_sched/init.c @@ -186,7 +186,7 @@ static struct rte_sched_pipe_params pipe_profiles[RTE_SCHED_PIPE_PROFILES_PER_PO .tc_ov_weight = 1, #endif - .wrr_weights = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + .wrr_weights = {1, 1, 1, 1}, }, }; diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile index 644fd9d15b..3d7f410e12 100644 --- a/lib/librte_sched/Makefile +++ b/lib/librte_sched/Makefile @@ -18,7 +18,7 @@ LDLIBS += -lrte_timer EXPORT_MAP := rte_sched_version.map -LIBABIVER := 2 +LIBABIVER := 3 # # all source are stored in SRCS-y diff --git a/lib/librte_sched/meson.build b/lib/librte_sched/meson.build index 8e989e5f64..59d43c6d81 100644 --- a/lib/librte_sched/meson.build +++ b/lib/librte_sched/meson.build @@ -1,7 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2017 Intel Corporation -version = 2 +version = 3 sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c') headers = files('rte_sched.h', 'rte_sched_common.h', 'rte_red.h', 'rte_approx.h') diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index bc06bc3f4e..1592c804b0 100644 --- a/lib/librte_sched/rte_sched.c +++ b/lib/librte_sched/rte_sched.c @@ -37,6 +37,7 @@ #define RTE_SCHED_TB_RATE_CONFIG_ERR (1e-7) #define RTE_SCHED_WRR_SHIFT 3 +#define RTE_SCHED_MAX_QUEUES_PER_TC RTE_SCHED_BE_QUEUES_PER_PIPE #define RTE_SCHED_GRINDER_PCACHE_SIZE (64 / RTE_SCHED_QUEUES_PER_PIPE) #define RTE_SCHED_PIPE_INVALID UINT32_MAX #define RTE_SCHED_BMP_POS_INVALID UINT32_MAX @@ -84,8 +85,8 @@ struct rte_sched_pipe_profile { uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; uint8_t tc_ov_weight; - /* Pipe queues */ - uint8_t wrr_cost[RTE_SCHED_QUEUES_PER_PIPE]; + /* Pipe best-effort traffic class queues */ + uint8_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE]; }; struct rte_sched_pipe { @@ -101,7 +102,7 @@ struct rte_sched_pipe { uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /* Weighted Round Robin (WRR) */ - uint8_t wrr_tokens[RTE_SCHED_QUEUES_PER_PIPE]; + uint8_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE]; /* TC oversubscription */ uint32_t tc_ov_credits; @@ -153,16 +154,16 @@ struct rte_sched_grinder { uint32_t tc_index; struct rte_sched_queue *queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; struct rte_mbuf **qbase[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; - uint32_t qindex[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint32_t qindex[RTE_SCHED_MAX_QUEUES_PER_TC]; uint16_t qsize; uint32_t qmask; uint32_t qpos; struct rte_mbuf *pkt; /* WRR */ - uint16_t wrr_tokens[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS]; - uint16_t wrr_mask[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS]; - uint8_t wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS]; + uint16_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE]; + uint16_t wrr_mask[RTE_SCHED_BE_QUEUES_PER_PIPE]; + uint8_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE]; }; struct rte_sched_port { @@ -483,7 +484,7 @@ rte_sched_port_log_pipe_profile(struct rte_sched_port *port, uint32_t i) " Token bucket: period = %u, credits per period = %u, size = %u\n" " Traffic classes: period = %u, credits per period = [%u, %u, %u, %u]\n" " Traffic class 3 oversubscription: weight = %hhu\n" - " WRR cost: [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu]\n", + " WRR cost: [%hhu, %hhu, %hhu, %hhu]\n", i, /* Token bucket */ @@ -502,10 +503,7 @@ rte_sched_port_log_pipe_profile(struct rte_sched_port *port, uint32_t i) p->tc_ov_weight, /* WRR */ - p->wrr_cost[ 0], p->wrr_cost[ 1], p->wrr_cost[ 2], p->wrr_cost[ 3], - p->wrr_cost[ 4], p->wrr_cost[ 5], p->wrr_cost[ 6], p->wrr_cost[ 7], - p->wrr_cost[ 8], p->wrr_cost[ 9], p->wrr_cost[10], p->wrr_cost[11], - p->wrr_cost[12], p->wrr_cost[13], p->wrr_cost[14], p->wrr_cost[15]); + p->wrr_cost[0], p->wrr_cost[1], p->wrr_cost[2], p->wrr_cost[3]); } static inline uint64_t @@ -523,6 +521,8 @@ rte_sched_pipe_profile_convert(struct rte_sched_pipe_params *src, struct rte_sched_pipe_profile *dst, uint32_t rate) { + uint32_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE]; + uint32_t lcd1, lcd2, lcd; uint32_t i; /* Token Bucket */ @@ -553,33 +553,25 @@ rte_sched_pipe_profile_convert(struct rte_sched_pipe_params *src, dst->tc_ov_weight = src->tc_ov_weight; #endif - /* WRR */ - for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { - uint32_t wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS]; - uint32_t lcd, lcd1, lcd2; - uint32_t qindex; + /* WRR queues */ + wrr_cost[0] = src->wrr_weights[0]; + wrr_cost[1] = src->wrr_weights[1]; + wrr_cost[2] = src->wrr_weights[2]; + wrr_cost[3] = src->wrr_weights[3]; - qindex = i * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; + lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]); + lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]); + lcd = rte_get_lcd(lcd1, lcd2); - wrr_cost[0] = src->wrr_weights[qindex]; - wrr_cost[1] = src->wrr_weights[qindex + 1]; - wrr_cost[2] = src->wrr_weights[qindex + 2]; - wrr_cost[3] = src->wrr_weights[qindex + 3]; + wrr_cost[0] = lcd / wrr_cost[0]; + wrr_cost[1] = lcd / wrr_cost[1]; + wrr_cost[2] = lcd / wrr_cost[2]; + wrr_cost[3] = lcd / wrr_cost[3]; - lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]); - lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]); - lcd = rte_get_lcd(lcd1, lcd2); - - wrr_cost[0] = lcd / wrr_cost[0]; - wrr_cost[1] = lcd / wrr_cost[1]; - wrr_cost[2] = lcd / wrr_cost[2]; - wrr_cost[3] = lcd / wrr_cost[3]; - - dst->wrr_cost[qindex] = (uint8_t) wrr_cost[0]; - dst->wrr_cost[qindex + 1] = (uint8_t) wrr_cost[1]; - dst->wrr_cost[qindex + 2] = (uint8_t) wrr_cost[2]; - dst->wrr_cost[qindex + 3] = (uint8_t) wrr_cost[3]; - } + dst->wrr_cost[0] = (uint8_t) wrr_cost[0]; + dst->wrr_cost[1] = (uint8_t) wrr_cost[1]; + dst->wrr_cost[2] = (uint8_t) wrr_cost[2]; + dst->wrr_cost[3] = (uint8_t) wrr_cost[3]; } static void @@ -1715,6 +1707,7 @@ grinder_schedule(struct rte_sched_port *port, uint32_t pos) struct rte_sched_queue *queue = grinder->queue[grinder->qpos]; struct rte_mbuf *pkt = grinder->pkt; uint32_t pkt_len = pkt->pkt_len + port->frame_overhead; + uint32_t be_tc_active; if (!grinder_credits_check(port, pos)) return 0; @@ -1725,13 +1718,18 @@ grinder_schedule(struct rte_sched_port *port, uint32_t pos) /* Send packet */ port->pkts_out[port->n_pkts_out++] = pkt; queue->qr++; - grinder->wrr_tokens[grinder->qpos] += pkt_len * grinder->wrr_cost[grinder->qpos]; + + be_tc_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE) ? ~0x0 : 0x0; + grinder->wrr_tokens[grinder->qpos] += + (pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active; + if (queue->qr == queue->qw) { uint32_t qindex = grinder->qindex[grinder->qpos]; rte_bitmap_clear(port->bmp, qindex); grinder->qmask &= ~(1 << grinder->qpos); - grinder->wrr_mask[grinder->qpos] = 0; + if (be_tc_active) + grinder->wrr_mask[grinder->qpos] = 0; rte_sched_port_set_queue_empty_timestamp(port, qindex); } @@ -1962,26 +1960,26 @@ grinder_wrr_load(struct rte_sched_port *port, uint32_t pos) struct rte_sched_grinder *grinder = port->grinder + pos; struct rte_sched_pipe *pipe = grinder->pipe; struct rte_sched_pipe_profile *pipe_params = grinder->pipe_params; - uint32_t tc_index = grinder->tc_index; uint32_t qmask = grinder->qmask; - uint32_t qindex; - qindex = tc_index * 4; - - grinder->wrr_tokens[0] = ((uint16_t) pipe->wrr_tokens[qindex]) << RTE_SCHED_WRR_SHIFT; - grinder->wrr_tokens[1] = ((uint16_t) pipe->wrr_tokens[qindex + 1]) << RTE_SCHED_WRR_SHIFT; - grinder->wrr_tokens[2] = ((uint16_t) pipe->wrr_tokens[qindex + 2]) << RTE_SCHED_WRR_SHIFT; - grinder->wrr_tokens[3] = ((uint16_t) pipe->wrr_tokens[qindex + 3]) << RTE_SCHED_WRR_SHIFT; + grinder->wrr_tokens[0] = + ((uint16_t) pipe->wrr_tokens[0]) << RTE_SCHED_WRR_SHIFT; + grinder->wrr_tokens[1] = + ((uint16_t) pipe->wrr_tokens[1]) << RTE_SCHED_WRR_SHIFT; + grinder->wrr_tokens[2] = + ((uint16_t) pipe->wrr_tokens[2]) << RTE_SCHED_WRR_SHIFT; + grinder->wrr_tokens[3] = + ((uint16_t) pipe->wrr_tokens[3]) << RTE_SCHED_WRR_SHIFT; grinder->wrr_mask[0] = (qmask & 0x1) * 0xFFFF; grinder->wrr_mask[1] = ((qmask >> 1) & 0x1) * 0xFFFF; grinder->wrr_mask[2] = ((qmask >> 2) & 0x1) * 0xFFFF; grinder->wrr_mask[3] = ((qmask >> 3) & 0x1) * 0xFFFF; - grinder->wrr_cost[0] = pipe_params->wrr_cost[qindex]; - grinder->wrr_cost[1] = pipe_params->wrr_cost[qindex + 1]; - grinder->wrr_cost[2] = pipe_params->wrr_cost[qindex + 2]; - grinder->wrr_cost[3] = pipe_params->wrr_cost[qindex + 3]; + grinder->wrr_cost[0] = pipe_params->wrr_cost[0]; + grinder->wrr_cost[1] = pipe_params->wrr_cost[1]; + grinder->wrr_cost[2] = pipe_params->wrr_cost[2]; + grinder->wrr_cost[3] = pipe_params->wrr_cost[3]; } static inline void @@ -1989,19 +1987,19 @@ grinder_wrr_store(struct rte_sched_port *port, uint32_t pos) { struct rte_sched_grinder *grinder = port->grinder + pos; struct rte_sched_pipe *pipe = grinder->pipe; - uint32_t tc_index = grinder->tc_index; - uint32_t qindex; - qindex = tc_index * 4; - - pipe->wrr_tokens[qindex] = (grinder->wrr_tokens[0] & grinder->wrr_mask[0]) - >> RTE_SCHED_WRR_SHIFT; - pipe->wrr_tokens[qindex + 1] = (grinder->wrr_tokens[1] & grinder->wrr_mask[1]) - >> RTE_SCHED_WRR_SHIFT; - pipe->wrr_tokens[qindex + 2] = (grinder->wrr_tokens[2] & grinder->wrr_mask[2]) - >> RTE_SCHED_WRR_SHIFT; - pipe->wrr_tokens[qindex + 3] = (grinder->wrr_tokens[3] & grinder->wrr_mask[3]) - >> RTE_SCHED_WRR_SHIFT; + pipe->wrr_tokens[0] = + (grinder->wrr_tokens[0] & grinder->wrr_mask[0]) >> + RTE_SCHED_WRR_SHIFT; + pipe->wrr_tokens[1] = + (grinder->wrr_tokens[1] & grinder->wrr_mask[1]) >> + RTE_SCHED_WRR_SHIFT; + pipe->wrr_tokens[2] = + (grinder->wrr_tokens[2] & grinder->wrr_mask[2]) >> + RTE_SCHED_WRR_SHIFT; + pipe->wrr_tokens[3] = + (grinder->wrr_tokens[3] & grinder->wrr_mask[3]) >> + RTE_SCHED_WRR_SHIFT; } static inline void @@ -2040,9 +2038,18 @@ static inline void grinder_prefetch_tc_queue_arrays(struct rte_sched_port *port, uint32_t pos) { struct rte_sched_grinder *grinder = port->grinder + pos; - uint16_t qsize, qr[4]; + uint16_t qsize, qr[RTE_SCHED_MAX_QUEUES_PER_TC]; qsize = grinder->qsize; + grinder->qpos = 0; + + if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) { + qr[0] = grinder->queue[0]->qr & (qsize - 1); + + rte_prefetch0(grinder->qbase[0] + qr[0]); + return; + } + qr[0] = grinder->queue[0]->qr & (qsize - 1); qr[1] = grinder->queue[1]->qr & (qsize - 1); qr[2] = grinder->queue[2]->qr & (qsize - 1); @@ -2118,18 +2125,24 @@ grinder_handle(struct rte_sched_port *port, uint32_t pos) case e_GRINDER_READ_MBUF: { - uint32_t result = 0; + uint32_t wrr_active, result = 0; result = grinder_schedule(port, pos); + wrr_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE); + /* Look for next packet within the same TC */ if (result && grinder->qmask) { - grinder_wrr(port, pos); + if (wrr_active) + grinder_wrr(port, pos); + grinder_prefetch_mbuf(port, pos); return 1; } - grinder_wrr_store(port, pos); + + if (wrr_active) + grinder_wrr_store(port, pos); /* Look for another active TC within same pipe */ if (grinder_next_tc(port, pos)) { diff --git a/lib/librte_sched/rte_sched.h b/lib/librte_sched/rte_sched.h index d61dda9f5f..662c3a0579 100644 --- a/lib/librte_sched/rte_sched.h +++ b/lib/librte_sched/rte_sched.h @@ -66,18 +66,35 @@ extern "C" { #include "rte_red.h" #endif +/** Maximum number of queues per pipe. + * Note that the multiple queues (power of 2) can only be assigned to + * lowest priority (best-effort) traffic class. Other higher priority traffic + * classes can only have one queue. + * Can not change. + * + * @see struct rte_sched_port_params + */ +#define RTE_SCHED_QUEUES_PER_PIPE 16 + +/** Number of WRR queues for best-effort traffic class per pipe. + * + * @see struct rte_sched_pipe_params + */ +#define RTE_SCHED_BE_QUEUES_PER_PIPE 4 + /** Number of traffic classes per pipe (as well as subport). * Cannot be changed. */ #define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE 4 +/** Best-effort traffic class ID + * Can not change. + */ +#define RTE_SCHED_TRAFFIC_CLASS_BE (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1) + /** Number of queues per pipe traffic class. Cannot be changed. */ #define RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS 4 -/** Number of queues per pipe. */ -#define RTE_SCHED_QUEUES_PER_PIPE \ - (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * \ - RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) /** Maximum number of pipe profiles that can be defined per port. * Compile-time configurable. @@ -165,7 +182,7 @@ struct rte_sched_pipe_params { #endif /* Pipe queues */ - uint8_t wrr_weights[RTE_SCHED_QUEUES_PER_PIPE]; /**< WRR weights */ + uint8_t wrr_weights[RTE_SCHED_BE_QUEUES_PER_PIPE]; /**< WRR weights */ }; /** Queue statistics */