Ruifeng Wang fe55802814 common/octeontx2: fix build with SVE
Building with gcc 10.2 with SVE extension enabled got error:

{standard input}: Assembler messages:
{standard input}:4002: Error: selected processor does not support `mov z3.b,#0'
{standard input}:4003: Error: selected processor does not support `whilelo p1.b,xzr,x7'
{standard input}:4005: Error: selected processor does not support `ld1b z0.b,p1/z,[x8]'
{standard input}:4006: Error: selected processor does not support `whilelo p4.s,wzr,w7'

This is because inline assembly code explicitly resets cpu model to
not have SVE support. Thus SVE instructions generated by compiler
auto vectorization got rejected by assembler.

Added SVE to the cpu model specified by inline assembly for SVE support.
Not replacing the inline assembly with C atomics because the driver relies
on specific LSE instruction to interface to co-processor [1].

Fixes: 8a4f835971f5 ("common/octeontx2: add IO handling APIs")
Cc: stable@dpdk.org

[1] https://mails.dpdk.org/archives/dev/2021-January/196092.html

Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Jerin Jacob <jerinj@marvell.com>
2021-01-14 16:42:25 +01:00

115 lines
2.6 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(C) 2019 Marvell International Ltd.
*/
#ifndef _OTX2_IO_ARM64_H_
#define _OTX2_IO_ARM64_H_
#define otx2_load_pair(val0, val1, addr) ({ \
asm volatile( \
"ldp %x[x0], %x[x1], [%x[p1]]" \
:[x0]"=r"(val0), [x1]"=r"(val1) \
:[p1]"r"(addr) \
); })
#define otx2_store_pair(val0, val1, addr) ({ \
asm volatile( \
"stp %x[x0], %x[x1], [%x[p1],#0]!" \
::[x0]"r"(val0), [x1]"r"(val1), [p1]"r"(addr) \
); })
#define otx2_prefetch_store_keep(ptr) ({\
asm volatile("prfm pstl1keep, [%x0]\n" : : "r" (ptr)); })
#if defined(__ARM_FEATURE_SVE)
#define __LSE_PREAMBLE " .cpu generic+lse+sve\n"
#else
#define __LSE_PREAMBLE " .cpu generic+lse\n"
#endif
static __rte_always_inline uint64_t
otx2_atomic64_add_nosync(int64_t incr, int64_t *ptr)
{
uint64_t result;
/* Atomic add with no ordering */
asm volatile (
__LSE_PREAMBLE
"ldadd %x[i], %x[r], [%[b]]"
: [r] "=r" (result), "+m" (*ptr)
: [i] "r" (incr), [b] "r" (ptr)
: "memory");
return result;
}
static __rte_always_inline uint64_t
otx2_atomic64_add_sync(int64_t incr, int64_t *ptr)
{
uint64_t result;
/* Atomic add with ordering */
asm volatile (
__LSE_PREAMBLE
"ldadda %x[i], %x[r], [%[b]]"
: [r] "=r" (result), "+m" (*ptr)
: [i] "r" (incr), [b] "r" (ptr)
: "memory");
return result;
}
static __rte_always_inline uint64_t
otx2_lmt_submit(rte_iova_t io_address)
{
uint64_t result;
asm volatile (
__LSE_PREAMBLE
"ldeor xzr,%x[rf],[%[rs]]" :
[rf] "=r"(result): [rs] "r"(io_address));
return result;
}
static __rte_always_inline uint64_t
otx2_lmt_submit_release(rte_iova_t io_address)
{
uint64_t result;
asm volatile (
__LSE_PREAMBLE
"ldeorl xzr,%x[rf],[%[rs]]" :
[rf] "=r"(result) : [rs] "r"(io_address));
return result;
}
static __rte_always_inline void
otx2_lmt_mov(void *out, const void *in, const uint32_t lmtext)
{
volatile const __uint128_t *src128 = (const __uint128_t *)in;
volatile __uint128_t *dst128 = (__uint128_t *)out;
dst128[0] = src128[0];
dst128[1] = src128[1];
/* lmtext receives following value:
* 1: NIX_SUBDC_EXT needed i.e. tx vlan case
* 2: NIX_SUBDC_EXT + NIX_SUBDC_MEM i.e. tstamp case
*/
if (lmtext) {
dst128[2] = src128[2];
if (lmtext > 1)
dst128[3] = src128[3];
}
}
static __rte_always_inline void
otx2_lmt_mov_seg(void *out, const void *in, const uint16_t segdw)
{
volatile const __uint128_t *src128 = (const __uint128_t *)in;
volatile __uint128_t *dst128 = (__uint128_t *)out;
uint8_t i;
for (i = 0; i < segdw; i++)
dst128[i] = src128[i];
}
#undef __LSE_PREAMBLE
#endif /* _OTX2_IO_ARM64_H_ */