event/octeontx2: simplify timer bucket estimation

Simplify timer bucket estimation we need not align buckets to
power of 2 instead use reciprocal division to compute mod.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
This commit is contained in:
Pavan Nikhilesh 2021-03-23 14:14:35 +05:30 committed by Jerin Jacob
parent 64ea4ae178
commit 25b401c8b6
4 changed files with 61 additions and 139 deletions

View File

@ -34,27 +34,25 @@ tim_set_fp_ops(struct otx2_tim_ring *tim_ring)
{
uint8_t prod_flag = !tim_ring->prod_type_sp;
/* [MOD/AND] [DFB/FB] [SP][MP]*/
const rte_event_timer_arm_burst_t arm_burst[2][2][2][2] = {
#define FP(_name, _f4, _f3, _f2, _f1, flags) \
[_f4][_f3][_f2][_f1] = otx2_tim_arm_burst_ ## _name,
TIM_ARM_FASTPATH_MODES
/* [DFB/FB] [SP][MP]*/
const rte_event_timer_arm_burst_t arm_burst[2][2][2] = {
#define FP(_name, _f3, _f2, _f1, flags) \
[_f3][_f2][_f1] = otx2_tim_arm_burst_##_name,
TIM_ARM_FASTPATH_MODES
#undef FP
};
const rte_event_timer_arm_tmo_tick_burst_t arm_tmo_burst[2][2][2] = {
#define FP(_name, _f3, _f2, _f1, flags) \
[_f3][_f2][_f1] = otx2_tim_arm_tmo_tick_burst_ ## _name,
TIM_ARM_TMO_FASTPATH_MODES
const rte_event_timer_arm_tmo_tick_burst_t arm_tmo_burst[2][2] = {
#define FP(_name, _f2, _f1, flags) \
[_f2][_f1] = otx2_tim_arm_tmo_tick_burst_##_name,
TIM_ARM_TMO_FASTPATH_MODES
#undef FP
};
otx2_tim_ops.arm_burst =
arm_burst[tim_ring->enable_stats][tim_ring->optimized]
[tim_ring->ena_dfb][prod_flag];
arm_burst[tim_ring->enable_stats][tim_ring->ena_dfb][prod_flag];
otx2_tim_ops.arm_tmo_tick_burst =
arm_tmo_burst[tim_ring->enable_stats][tim_ring->optimized]
[tim_ring->ena_dfb];
arm_tmo_burst[tim_ring->enable_stats][tim_ring->ena_dfb];
otx2_tim_ops.cancel_burst = otx2_tim_timer_cancel_burst;
}
@ -71,51 +69,6 @@ otx2_tim_ring_info_get(const struct rte_event_timer_adapter *adptr,
sizeof(struct rte_event_timer_adapter_conf));
}
static void
tim_optimze_bkt_param(struct otx2_tim_ring *tim_ring)
{
uint64_t tck_nsec;
uint32_t hbkts;
uint32_t lbkts;
hbkts = rte_align32pow2(tim_ring->nb_bkts);
tck_nsec = RTE_ALIGN_MUL_CEIL(tim_ring->max_tout / (hbkts - 1), 10);
if ((tck_nsec < TICK2NSEC(OTX2_TIM_MIN_TMO_TKS,
tim_ring->tenns_clk_freq) ||
hbkts > OTX2_TIM_MAX_BUCKETS))
hbkts = 0;
lbkts = rte_align32prevpow2(tim_ring->nb_bkts);
tck_nsec = RTE_ALIGN_MUL_CEIL((tim_ring->max_tout / (lbkts - 1)), 10);
if ((tck_nsec < TICK2NSEC(OTX2_TIM_MIN_TMO_TKS,
tim_ring->tenns_clk_freq) ||
lbkts > OTX2_TIM_MAX_BUCKETS))
lbkts = 0;
if (!hbkts && !lbkts)
return;
if (!hbkts) {
tim_ring->nb_bkts = lbkts;
goto end;
} else if (!lbkts) {
tim_ring->nb_bkts = hbkts;
goto end;
}
tim_ring->nb_bkts = (hbkts - tim_ring->nb_bkts) <
(tim_ring->nb_bkts - lbkts) ? hbkts : lbkts;
end:
tim_ring->optimized = true;
tim_ring->tck_nsec = RTE_ALIGN_MUL_CEIL((tim_ring->max_tout /
(tim_ring->nb_bkts - 1)), 10);
otx2_tim_dbg("Optimized configured values");
otx2_tim_dbg("Nb_bkts : %" PRIu32 "", tim_ring->nb_bkts);
otx2_tim_dbg("Tck_nsec : %" PRIu64 "", tim_ring->tck_nsec);
}
static int
tim_chnk_pool_create(struct otx2_tim_ring *tim_ring,
struct rte_event_timer_adapter_conf *rcfg)
@ -337,14 +290,6 @@ otx2_tim_ring_create(struct rte_event_timer_adapter *adptr)
tim_ring->chunk_sz);
tim_ring->nb_chunk_slots = OTX2_TIM_NB_CHUNK_SLOTS(tim_ring->chunk_sz);
/* Try to optimize the bucket parameters. */
if ((rcfg->flags & RTE_EVENT_TIMER_ADAPTER_F_ADJUST_RES)) {
if (rte_is_power_of_2(tim_ring->nb_bkts))
tim_ring->optimized = true;
else
tim_optimze_bkt_param(tim_ring);
}
if (tim_ring->disable_npa)
tim_ring->nb_chunks = tim_ring->nb_chunks * tim_ring->nb_bkts;
else
@ -477,6 +422,7 @@ otx2_tim_ring_start(const struct rte_event_timer_adapter *adptr)
tim_ring->tck_int = NSEC2TICK(tim_ring->tck_nsec, rte_get_timer_hz());
tim_ring->tot_int = tim_ring->tck_int * tim_ring->nb_bkts;
tim_ring->fast_div = rte_reciprocal_value_u64(tim_ring->tck_int);
tim_ring->fast_bkt = rte_reciprocal_value_u64(tim_ring->nb_bkts);
otx2_tim_calibrate_start_tsc(tim_ring);

View File

@ -76,8 +76,6 @@
#define OTX2_TIM_SP 0x1
#define OTX2_TIM_MP 0x2
#define OTX2_TIM_BKT_AND 0x4
#define OTX2_TIM_BKT_MOD 0x8
#define OTX2_TIM_ENA_FB 0x10
#define OTX2_TIM_ENA_DFB 0x20
#define OTX2_TIM_ENA_STATS 0x40
@ -149,11 +147,11 @@ struct otx2_tim_ring {
struct otx2_tim_bkt *bkt;
struct rte_mempool *chunk_pool;
struct rte_reciprocal_u64 fast_div;
struct rte_reciprocal_u64 fast_bkt;
uint64_t arm_cnt;
uint8_t prod_type_sp;
uint8_t enable_stats;
uint8_t disable_npa;
uint8_t optimized;
uint8_t ena_dfb;
uint8_t ena_periodic;
uint16_t ring_id;
@ -180,59 +178,37 @@ tim_priv_get(void)
}
#define TIM_ARM_FASTPATH_MODES \
FP(mod_sp, 0, 0, 0, 0, OTX2_TIM_BKT_MOD | OTX2_TIM_ENA_DFB | OTX2_TIM_SP) \
FP(mod_mp, 0, 0, 0, 1, OTX2_TIM_BKT_MOD | OTX2_TIM_ENA_DFB | OTX2_TIM_MP) \
FP(mod_fb_sp, 0, 0, 1, 0, OTX2_TIM_BKT_MOD | OTX2_TIM_ENA_FB | OTX2_TIM_SP) \
FP(mod_fb_mp, 0, 0, 1, 1, OTX2_TIM_BKT_MOD | OTX2_TIM_ENA_FB | OTX2_TIM_MP) \
FP(and_sp, 0, 1, 0, 0, OTX2_TIM_BKT_AND | OTX2_TIM_ENA_DFB | OTX2_TIM_SP) \
FP(and_mp, 0, 1, 0, 1, OTX2_TIM_BKT_AND | OTX2_TIM_ENA_DFB | OTX2_TIM_MP) \
FP(and_fb_sp, 0, 1, 1, 0, OTX2_TIM_BKT_AND | OTX2_TIM_ENA_FB | OTX2_TIM_SP) \
FP(and_fb_mp, 0, 1, 1, 1, OTX2_TIM_BKT_AND | OTX2_TIM_ENA_FB | OTX2_TIM_MP) \
FP(stats_mod_sp, 1, 0, 0, 0, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_MOD | \
OTX2_TIM_ENA_DFB | OTX2_TIM_SP) \
FP(stats_mod_mp, 1, 0, 0, 1, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_MOD | \
OTX2_TIM_ENA_DFB | OTX2_TIM_MP) \
FP(stats_mod_fb_sp, 1, 0, 1, 0, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_MOD | \
OTX2_TIM_ENA_FB | OTX2_TIM_SP) \
FP(stats_mod_fb_mp, 1, 0, 1, 1, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_MOD | \
OTX2_TIM_ENA_FB | OTX2_TIM_MP) \
FP(stats_and_sp, 1, 1, 0, 0, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_AND | \
OTX2_TIM_ENA_DFB | OTX2_TIM_SP) \
FP(stats_and_mp, 1, 1, 0, 1, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_AND | \
OTX2_TIM_ENA_DFB | OTX2_TIM_MP) \
FP(stats_and_fb_sp, 1, 1, 1, 0, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_AND | \
OTX2_TIM_ENA_FB | OTX2_TIM_SP) \
FP(stats_and_fb_mp, 1, 1, 1, 1, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_AND | \
OTX2_TIM_ENA_FB | OTX2_TIM_MP)
FP(sp, 0, 0, 0, OTX2_TIM_ENA_DFB | OTX2_TIM_SP) \
FP(mp, 0, 0, 1, OTX2_TIM_ENA_DFB | OTX2_TIM_MP) \
FP(fb_sp, 0, 1, 0, OTX2_TIM_ENA_FB | OTX2_TIM_SP) \
FP(fb_mp, 0, 1, 1, OTX2_TIM_ENA_FB | OTX2_TIM_MP) \
FP(stats_mod_sp, 1, 0, 0, \
OTX2_TIM_ENA_STATS | OTX2_TIM_ENA_DFB | OTX2_TIM_SP) \
FP(stats_mod_mp, 1, 0, 1, \
OTX2_TIM_ENA_STATS | OTX2_TIM_ENA_DFB | OTX2_TIM_MP) \
FP(stats_mod_fb_sp, 1, 1, 0, \
OTX2_TIM_ENA_STATS | OTX2_TIM_ENA_FB | OTX2_TIM_SP) \
FP(stats_mod_fb_mp, 1, 1, 1, \
OTX2_TIM_ENA_STATS | OTX2_TIM_ENA_FB | OTX2_TIM_MP)
#define TIM_ARM_TMO_FASTPATH_MODES \
FP(mod, 0, 0, 0, OTX2_TIM_BKT_MOD | OTX2_TIM_ENA_DFB) \
FP(mod_fb, 0, 0, 1, OTX2_TIM_BKT_MOD | OTX2_TIM_ENA_FB) \
FP(and, 0, 1, 0, OTX2_TIM_BKT_AND | OTX2_TIM_ENA_DFB) \
FP(and_fb, 0, 1, 1, OTX2_TIM_BKT_AND | OTX2_TIM_ENA_FB) \
FP(stats_mod, 1, 0, 0, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_MOD | \
OTX2_TIM_ENA_DFB) \
FP(stats_mod_fb, 1, 0, 1, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_MOD | \
OTX2_TIM_ENA_FB) \
FP(stats_and, 1, 1, 0, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_AND | \
OTX2_TIM_ENA_DFB) \
FP(stats_and_fb, 1, 1, 1, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_AND | \
OTX2_TIM_ENA_FB)
FP(dfb, 0, 0, OTX2_TIM_ENA_DFB) \
FP(fb, 0, 1, OTX2_TIM_ENA_FB) \
FP(stats_dfb, 1, 0, OTX2_TIM_ENA_STATS | OTX2_TIM_ENA_DFB) \
FP(stats_fb, 1, 1, OTX2_TIM_ENA_STATS | OTX2_TIM_ENA_FB)
#define FP(_name, _f4, _f3, _f2, _f1, flags) \
uint16_t \
otx2_tim_arm_burst_ ## _name(const struct rte_event_timer_adapter *adptr, \
struct rte_event_timer **tim, \
const uint16_t nb_timers);
#define FP(_name, _f3, _f2, _f1, flags) \
uint16_t otx2_tim_arm_burst_##_name( \
const struct rte_event_timer_adapter *adptr, \
struct rte_event_timer **tim, const uint16_t nb_timers);
TIM_ARM_FASTPATH_MODES
#undef FP
#define FP(_name, _f3, _f2, _f1, flags) \
uint16_t \
otx2_tim_arm_tmo_tick_burst_ ## _name( \
#define FP(_name, _f2, _f1, flags) \
uint16_t otx2_tim_arm_tmo_tick_burst_##_name( \
const struct rte_event_timer_adapter *adptr, \
struct rte_event_timer **tim, \
const uint64_t timeout_tick, const uint16_t nb_timers);
struct rte_event_timer **tim, const uint64_t timeout_tick, \
const uint16_t nb_timers);
TIM_ARM_TMO_FASTPATH_MODES
#undef FP

View File

@ -136,7 +136,7 @@ tim_timer_arm_tmo_brst(const struct rte_event_timer_adapter *adptr,
return set_timers;
}
#define FP(_name, _f4, _f3, _f2, _f1, _flags) \
#define FP(_name, _f3, _f2, _f1, _flags) \
uint16_t __rte_noinline \
otx2_tim_arm_burst_ ## _name(const struct rte_event_timer_adapter *adptr, \
struct rte_event_timer **tim, \
@ -147,7 +147,7 @@ otx2_tim_arm_burst_ ## _name(const struct rte_event_timer_adapter *adptr, \
TIM_ARM_FASTPATH_MODES
#undef FP
#define FP(_name, _f3, _f2, _f1, _flags) \
#define FP(_name, _f2, _f1, _flags) \
uint16_t __rte_noinline \
otx2_tim_arm_tmo_tick_burst_ ## _name( \
const struct rte_event_timer_adapter *adptr, \

View File

@ -115,27 +115,27 @@ tim_bkt_clr_nent(struct otx2_tim_bkt *bktp)
return __atomic_and_fetch(&bktp->w1, v, __ATOMIC_ACQ_REL);
}
static inline uint64_t
tim_bkt_fast_mod(uint64_t n, uint64_t d, struct rte_reciprocal_u64 R)
{
return (n - (d * rte_reciprocal_divide_u64(n, &R)));
}
static __rte_always_inline void
tim_get_target_bucket(struct otx2_tim_ring * const tim_ring,
tim_get_target_bucket(struct otx2_tim_ring *const tim_ring,
const uint32_t rel_bkt, struct otx2_tim_bkt **bkt,
struct otx2_tim_bkt **mirr_bkt, const uint8_t flag)
struct otx2_tim_bkt **mirr_bkt)
{
const uint64_t bkt_cyc = rte_rdtsc() - tim_ring->ring_start_cyc;
uint32_t bucket = rte_reciprocal_divide_u64(bkt_cyc,
&tim_ring->fast_div) + rel_bkt;
uint32_t mirr_bucket = 0;
if (flag & OTX2_TIM_BKT_MOD) {
bucket = bucket % tim_ring->nb_bkts;
mirr_bucket = (bucket + (tim_ring->nb_bkts >> 1)) %
tim_ring->nb_bkts;
}
if (flag & OTX2_TIM_BKT_AND) {
bucket = bucket & (tim_ring->nb_bkts - 1);
mirr_bucket = (bucket + (tim_ring->nb_bkts >> 1)) &
(tim_ring->nb_bkts - 1);
}
uint64_t bucket =
rte_reciprocal_divide_u64(bkt_cyc, &tim_ring->fast_div) +
rel_bkt;
uint64_t mirr_bucket = 0;
bucket =
tim_bkt_fast_mod(bucket, tim_ring->nb_bkts, tim_ring->fast_bkt);
mirr_bucket = tim_bkt_fast_mod(bucket + (tim_ring->nb_bkts >> 1),
tim_ring->nb_bkts, tim_ring->fast_bkt);
*bkt = &tim_ring->bkt[bucket];
*mirr_bkt = &tim_ring->bkt[mirr_bucket];
}
@ -236,7 +236,7 @@ tim_add_entry_sp(struct otx2_tim_ring * const tim_ring,
int16_t rem;
__retry:
tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags);
tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt);
/* Get Bucket sema*/
lock_sema = tim_bkt_fetch_sema_lock(bkt);
@ -322,7 +322,7 @@ tim_add_entry_mp(struct otx2_tim_ring * const tim_ring,
int16_t rem;
__retry:
tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags);
tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt);
/* Get Bucket sema*/
lock_sema = tim_bkt_fetch_sema_lock(bkt);
@ -454,7 +454,7 @@ tim_add_entry_brst(struct otx2_tim_ring * const tim_ring,
uint8_t lock_cnt;
__retry:
tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags);
tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt);
/* Only one thread beyond this. */
lock_sema = tim_bkt_inc_lock(bkt);