bus/fslmc: use CINH read on LS1088 platform

LS1088 platform CENA operation are causing issues
at high load. CINH (cache inhibited) mode is working
fine with minor performance impact.

This patch enables CINH mode selectively on LS1088 platform

Signed-off-by: Nipun Gupta <nipun.gupta@nxp.com>
This commit is contained in:
Nipun Gupta 2019-07-15 14:14:41 +05:30 committed by Thomas Monjalon
parent cb07a0883c
commit 63d5d0af4f
5 changed files with 303 additions and 13 deletions

View File

@ -28,8 +28,6 @@ RTE_DECLARE_PER_LCORE(struct dpaa2_io_portal_t, _dpaa2_io);
#define DPAA2_PER_LCORE_ETHRX_DPIO RTE_PER_LCORE(_dpaa2_io).ethrx_dpio_dev
#define DPAA2_PER_LCORE_ETHRX_PORTAL DPAA2_PER_LCORE_ETHRX_DPIO->sw_portal
/* Variable to store DPAA2 platform type */
extern uint32_t dpaa2_svr_family;
/* Variable to store DPAA2 DQRR size */
extern uint8_t dpaa2_dqrr_size;
/* Variable to store DPAA2 EQCR size */

View File

@ -23,11 +23,6 @@
#define lower_32_bits(x) ((uint32_t)(x))
#define upper_32_bits(x) ((uint32_t)(((x) >> 16) >> 16))
#define SVR_LS1080A 0x87030000
#define SVR_LS2080A 0x87010000
#define SVR_LS2088A 0x87090000
#define SVR_LX2160A 0x87360000
#ifndef VLAN_TAG_SIZE
#define VLAN_TAG_SIZE 4 /** < Vlan Header Length */
#endif

View File

@ -1,6 +1,7 @@
/* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (C) 2014 Freescale Semiconductor, Inc.
* Copyright 2015-2019 NXP
*
*/
#ifndef _FSL_QBMAN_PORTAL_H
@ -8,6 +9,14 @@
#include <fsl_qbman_base.h>
#define SVR_LS1080A 0x87030000
#define SVR_LS2080A 0x87010000
#define SVR_LS2088A 0x87090000
#define SVR_LX2160A 0x87360000
/* Variable to store DPAA2 platform type */
extern uint32_t dpaa2_svr_family;
/**
* DOC - QBMan portal APIs to implement the following functions:
* - Initialize and destroy Software portal object.

View File

@ -76,6 +76,10 @@ qbman_swp_enqueue_ring_mode_direct(struct qbman_swp *s,
const struct qbman_eq_desc *d,
const struct qbman_fd *fd);
static int
qbman_swp_enqueue_ring_mode_cinh_direct(struct qbman_swp *s,
const struct qbman_eq_desc *d,
const struct qbman_fd *fd);
static int
qbman_swp_enqueue_ring_mode_mem_back(struct qbman_swp *s,
const struct qbman_eq_desc *d,
const struct qbman_fd *fd);
@ -87,6 +91,12 @@ qbman_swp_enqueue_multiple_direct(struct qbman_swp *s,
uint32_t *flags,
int num_frames);
static int
qbman_swp_enqueue_multiple_cinh_direct(struct qbman_swp *s,
const struct qbman_eq_desc *d,
const struct qbman_fd *fd,
uint32_t *flags,
int num_frames);
static int
qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
const struct qbman_eq_desc *d,
const struct qbman_fd *fd,
@ -99,7 +109,12 @@ qbman_swp_enqueue_multiple_fd_direct(struct qbman_swp *s,
struct qbman_fd **fd,
uint32_t *flags,
int num_frames);
static int
qbman_swp_enqueue_multiple_fd_cinh_direct(struct qbman_swp *s,
const struct qbman_eq_desc *d,
struct qbman_fd **fd,
uint32_t *flags,
int num_frames);
static int
qbman_swp_enqueue_multiple_fd_mem_back(struct qbman_swp *s,
const struct qbman_eq_desc *d,
@ -113,6 +128,11 @@ qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s,
const struct qbman_fd *fd,
int num_frames);
static int
qbman_swp_enqueue_multiple_desc_cinh_direct(struct qbman_swp *s,
const struct qbman_eq_desc *d,
const struct qbman_fd *fd,
int num_frames);
static int
qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
const struct qbman_eq_desc *d,
const struct qbman_fd *fd,
@ -273,6 +293,17 @@ struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d)
qbman_swp_release_ptr = qbman_swp_release_mem_back;
}
if (dpaa2_svr_family == SVR_LS1080A) {
qbman_swp_enqueue_ring_mode_ptr =
qbman_swp_enqueue_ring_mode_cinh_direct;
qbman_swp_enqueue_multiple_ptr =
qbman_swp_enqueue_multiple_cinh_direct;
qbman_swp_enqueue_multiple_fd_ptr =
qbman_swp_enqueue_multiple_fd_cinh_direct;
qbman_swp_enqueue_multiple_desc_ptr =
qbman_swp_enqueue_multiple_desc_cinh_direct;
}
for (mask_size = p->eqcr.pi_ring_size; mask_size > 0; mask_size >>= 1)
p->eqcr.pi_ci_mask = (p->eqcr.pi_ci_mask<<1) + 1;
eqcr_pi = qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_EQCR_PI);
@ -700,6 +731,46 @@ static int qbman_swp_enqueue_ring_mode_direct(struct qbman_swp *s,
return 0;
}
static int qbman_swp_enqueue_ring_mode_cinh_direct(
struct qbman_swp *s,
const struct qbman_eq_desc *d,
const struct qbman_fd *fd)
{
uint32_t *p;
const uint32_t *cl = qb_cl(d);
uint32_t eqcr_ci, full_mask, half_mask;
half_mask = (s->eqcr.pi_ci_mask>>1);
full_mask = s->eqcr.pi_ci_mask;
if (!s->eqcr.available) {
eqcr_ci = s->eqcr.ci;
s->eqcr.ci = qbman_cinh_read(&s->sys,
QBMAN_CINH_SWP_EQCR_CI) & full_mask;
s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
eqcr_ci, s->eqcr.ci);
if (!s->eqcr.available)
return -EBUSY;
}
p = qbman_cena_write_start_wo_shadow(&s->sys,
QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask));
memcpy(&p[1], &cl[1], 28);
memcpy(&p[8], fd, sizeof(*fd));
lwsync();
/* Set the verb byte, have to substitute in the valid-bit */
p[0] = cl[0] | s->eqcr.pi_vb;
qbman_cena_write_complete_wo_shadow(&s->sys,
QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask));
s->eqcr.pi++;
s->eqcr.pi &= full_mask;
s->eqcr.available--;
if (!(s->eqcr.pi & half_mask))
s->eqcr.pi_vb ^= QB_VALID_BIT;
return 0;
}
static int qbman_swp_enqueue_ring_mode_mem_back(struct qbman_swp *s,
const struct qbman_eq_desc *d,
const struct qbman_fd *fd)
@ -823,6 +894,76 @@ static int qbman_swp_enqueue_multiple_direct(struct qbman_swp *s,
return num_enqueued;
}
static int qbman_swp_enqueue_multiple_cinh_direct(
struct qbman_swp *s,
const struct qbman_eq_desc *d,
const struct qbman_fd *fd,
uint32_t *flags,
int num_frames)
{
uint32_t *p = NULL;
const uint32_t *cl = qb_cl(d);
uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
int i, num_enqueued = 0;
uint64_t addr_cena;
half_mask = (s->eqcr.pi_ci_mask>>1);
full_mask = s->eqcr.pi_ci_mask;
if (!s->eqcr.available) {
eqcr_ci = s->eqcr.ci;
s->eqcr.ci = qbman_cinh_read(&s->sys,
QBMAN_CINH_SWP_EQCR_CI) & full_mask;
s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
eqcr_ci, s->eqcr.ci);
if (!s->eqcr.available)
return 0;
}
eqcr_pi = s->eqcr.pi;
num_enqueued = (s->eqcr.available < num_frames) ?
s->eqcr.available : num_frames;
s->eqcr.available -= num_enqueued;
/* Fill in the EQCR ring */
for (i = 0; i < num_enqueued; i++) {
p = qbman_cena_write_start_wo_shadow(&s->sys,
QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
memcpy(&p[1], &cl[1], 28);
memcpy(&p[8], &fd[i], sizeof(*fd));
eqcr_pi++;
}
lwsync();
/* Set the verb byte, have to substitute in the valid-bit */
eqcr_pi = s->eqcr.pi;
for (i = 0; i < num_enqueued; i++) {
p = qbman_cena_write_start_wo_shadow(&s->sys,
QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
p[0] = cl[0] | s->eqcr.pi_vb;
if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) |
((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
}
eqcr_pi++;
if (!(eqcr_pi & half_mask))
s->eqcr.pi_vb ^= QB_VALID_BIT;
}
/* Flush all the cacheline without load/store in between */
eqcr_pi = s->eqcr.pi;
addr_cena = (size_t)s->sys.addr_cena;
for (i = 0; i < num_enqueued; i++) {
dcbf(addr_cena +
QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
eqcr_pi++;
}
s->eqcr.pi = eqcr_pi & full_mask;
return num_enqueued;
}
static int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
const struct qbman_eq_desc *d,
const struct qbman_fd *fd,
@ -954,6 +1095,76 @@ static int qbman_swp_enqueue_multiple_fd_direct(struct qbman_swp *s,
return num_enqueued;
}
static int qbman_swp_enqueue_multiple_fd_cinh_direct(
struct qbman_swp *s,
const struct qbman_eq_desc *d,
struct qbman_fd **fd,
uint32_t *flags,
int num_frames)
{
uint32_t *p = NULL;
const uint32_t *cl = qb_cl(d);
uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
int i, num_enqueued = 0;
uint64_t addr_cena;
half_mask = (s->eqcr.pi_ci_mask>>1);
full_mask = s->eqcr.pi_ci_mask;
if (!s->eqcr.available) {
eqcr_ci = s->eqcr.ci;
s->eqcr.ci = qbman_cinh_read(&s->sys,
QBMAN_CINH_SWP_EQCR_CI) & full_mask;
s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
eqcr_ci, s->eqcr.ci);
if (!s->eqcr.available)
return 0;
}
eqcr_pi = s->eqcr.pi;
num_enqueued = (s->eqcr.available < num_frames) ?
s->eqcr.available : num_frames;
s->eqcr.available -= num_enqueued;
/* Fill in the EQCR ring */
for (i = 0; i < num_enqueued; i++) {
p = qbman_cena_write_start_wo_shadow(&s->sys,
QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
memcpy(&p[1], &cl[1], 28);
memcpy(&p[8], fd[i], sizeof(struct qbman_fd));
eqcr_pi++;
}
lwsync();
/* Set the verb byte, have to substitute in the valid-bit */
eqcr_pi = s->eqcr.pi;
for (i = 0; i < num_enqueued; i++) {
p = qbman_cena_write_start_wo_shadow(&s->sys,
QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
p[0] = cl[0] | s->eqcr.pi_vb;
if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) |
((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
}
eqcr_pi++;
if (!(eqcr_pi & half_mask))
s->eqcr.pi_vb ^= QB_VALID_BIT;
}
/* Flush all the cacheline without load/store in between */
eqcr_pi = s->eqcr.pi;
addr_cena = (size_t)s->sys.addr_cena;
for (i = 0; i < num_enqueued; i++) {
dcbf(addr_cena +
QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
eqcr_pi++;
}
s->eqcr.pi = eqcr_pi & full_mask;
return num_enqueued;
}
static int qbman_swp_enqueue_multiple_fd_mem_back(struct qbman_swp *s,
const struct qbman_eq_desc *d,
struct qbman_fd **fd,
@ -1087,6 +1298,71 @@ static int qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s,
return num_enqueued;
}
static int qbman_swp_enqueue_multiple_desc_cinh_direct(
struct qbman_swp *s,
const struct qbman_eq_desc *d,
const struct qbman_fd *fd,
int num_frames)
{
uint32_t *p;
const uint32_t *cl;
uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
int i, num_enqueued = 0;
uint64_t addr_cena;
half_mask = (s->eqcr.pi_ci_mask>>1);
full_mask = s->eqcr.pi_ci_mask;
if (!s->eqcr.available) {
eqcr_ci = s->eqcr.ci;
s->eqcr.ci = qbman_cinh_read(&s->sys,
QBMAN_CINH_SWP_EQCR_CI) & full_mask;
s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
eqcr_ci, s->eqcr.ci);
if (!s->eqcr.available)
return 0;
}
eqcr_pi = s->eqcr.pi;
num_enqueued = (s->eqcr.available < num_frames) ?
s->eqcr.available : num_frames;
s->eqcr.available -= num_enqueued;
/* Fill in the EQCR ring */
for (i = 0; i < num_enqueued; i++) {
p = qbman_cena_write_start_wo_shadow(&s->sys,
QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
cl = qb_cl(&d[i]);
memcpy(&p[1], &cl[1], 28);
memcpy(&p[8], &fd[i], sizeof(*fd));
eqcr_pi++;
}
lwsync();
/* Set the verb byte, have to substitute in the valid-bit */
eqcr_pi = s->eqcr.pi;
for (i = 0; i < num_enqueued; i++) {
p = qbman_cena_write_start_wo_shadow(&s->sys,
QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
cl = qb_cl(&d[i]);
p[0] = cl[0] | s->eqcr.pi_vb;
eqcr_pi++;
if (!(eqcr_pi & half_mask))
s->eqcr.pi_vb ^= QB_VALID_BIT;
}
/* Flush all the cacheline without load/store in between */
eqcr_pi = s->eqcr.pi;
addr_cena = (size_t)s->sys.addr_cena;
for (i = 0; i < num_enqueued; i++) {
dcbf(addr_cena +
QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
eqcr_pi++;
}
s->eqcr.pi = eqcr_pi & full_mask;
return num_enqueued;
}
static int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
const struct qbman_eq_desc *d,
const struct qbman_fd *fd,

View File

@ -381,6 +381,14 @@ static inline uint32_t qbman_set_swp_cfg(uint8_t max_fill, uint8_t wn,
#define QMAN_REV_5000 0x05000000
#define QMAN_REV_MASK 0xffff0000
#define SVR_LS1080A 0x87030000
#define SVR_LS2080A 0x87010000
#define SVR_LS2088A 0x87090000
#define SVR_LX2160A 0x87360000
/* Variable to store DPAA2 platform type */
extern uint32_t dpaa2_svr_family;
static inline int qbman_swp_sys_init(struct qbman_swp_sys *s,
const struct qbman_swp_desc *d,
uint8_t dqrr_size)
@ -388,16 +396,17 @@ static inline int qbman_swp_sys_init(struct qbman_swp_sys *s,
uint32_t reg;
int i;
int cena_region_size = 4*1024;
if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
&& (d->cena_access_mode == qman_cena_fastest_access))
cena_region_size = 64*1024;
uint8_t est = 1;
#ifdef RTE_ARCH_64
uint8_t wn = CENA_WRITE_ENABLE;
#else
uint8_t wn = CINH_WRITE_ENABLE;
#endif
if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
&& (d->cena_access_mode == qman_cena_fastest_access))
cena_region_size = 64*1024;
s->addr_cena = d->cena_bar;
s->addr_cinh = d->cinh_bar;
s->idx = (uint32_t)d->idx;
@ -428,6 +437,9 @@ static inline int qbman_swp_sys_init(struct qbman_swp_sys *s,
dccivac(s->addr_cena + i);
}
if (dpaa2_svr_family == SVR_LS1080A)
est = 0;
if (s->eqcr_mode == qman_eqcr_vb_array) {
reg = qbman_set_swp_cfg(dqrr_size, wn,
0, 3, 2, 3, 1, 1, 1, 1, 1, 1);
@ -438,7 +450,7 @@ static inline int qbman_swp_sys_init(struct qbman_swp_sys *s,
1, 3, 2, 0, 1, 1, 1, 1, 1, 1);
else
reg = qbman_set_swp_cfg(dqrr_size, wn,
1, 3, 2, 2, 1, 1, 1, 1, 1, 1);
est, 3, 2, 2, 1, 1, 1, 1, 1, 1);
}
if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000