common/cnxk: support NPA bulk alloc/free
Add APIs to alloc/free in bulk from NPA pool. Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com> Acked-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
This commit is contained in:
parent
f765f56112
commit
2409f53a27
@ -8,6 +8,11 @@
|
||||
#define ROC_AURA_ID_MASK (BIT_ULL(16) - 1)
|
||||
#define ROC_AURA_OP_LIMIT_MASK (BIT_ULL(36) - 1)
|
||||
|
||||
/* 16 CASP instructions can be outstanding in CN9k, but we use only 15
|
||||
* outstanding CASPs as we run out of registers.
|
||||
*/
|
||||
#define ROC_CN9K_NPA_BULK_ALLOC_MAX_PTRS 30
|
||||
|
||||
/*
|
||||
* Generate 64bit handle to have optimized alloc and free aura operation.
|
||||
* 0 - ROC_AURA_ID_MASK for storing the aura_id.
|
||||
@ -141,6 +146,230 @@ roc_npa_aura_op_available(uint64_t aura_handle)
|
||||
return reg & 0xFFFFFFFFF;
|
||||
}
|
||||
|
||||
static inline void
|
||||
roc_npa_aura_op_bulk_free(uint64_t aura_handle, uint64_t const *buf,
|
||||
unsigned int num, const int fabs)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
const uint64_t inbuf = buf[i];
|
||||
|
||||
roc_npa_aura_op_free(aura_handle, fabs, inbuf);
|
||||
}
|
||||
}
|
||||
|
||||
static inline unsigned int
|
||||
roc_npa_aura_bulk_alloc(uint64_t aura_handle, uint64_t *buf, unsigned int num,
|
||||
const int drop)
|
||||
{
|
||||
#if defined(__aarch64__)
|
||||
uint64_t wdata = roc_npa_aura_handle_to_aura(aura_handle);
|
||||
unsigned int i, count;
|
||||
uint64_t addr;
|
||||
|
||||
if (drop)
|
||||
wdata |= BIT_ULL(63); /* DROP */
|
||||
|
||||
addr = roc_npa_aura_handle_to_base(aura_handle) +
|
||||
NPA_LF_AURA_OP_ALLOCX(0);
|
||||
|
||||
switch (num) {
|
||||
case 30:
|
||||
asm volatile(
|
||||
".cpu generic+lse\n"
|
||||
"mov v18.d[0], %[dst]\n"
|
||||
"mov v18.d[1], %[loc]\n"
|
||||
"mov v19.d[0], %[wdata]\n"
|
||||
"mov v19.d[1], x30\n"
|
||||
"mov v20.d[0], x24\n"
|
||||
"mov v20.d[1], x25\n"
|
||||
"mov v21.d[0], x26\n"
|
||||
"mov v21.d[1], x27\n"
|
||||
"mov v22.d[0], x28\n"
|
||||
"mov v22.d[1], x29\n"
|
||||
"mov x28, v19.d[0]\n"
|
||||
"mov x29, v19.d[0]\n"
|
||||
"mov x30, v18.d[1]\n"
|
||||
"casp x0, x1, x28, x29, [x30]\n"
|
||||
"casp x2, x3, x28, x29, [x30]\n"
|
||||
"casp x4, x5, x28, x29, [x30]\n"
|
||||
"casp x6, x7, x28, x29, [x30]\n"
|
||||
"casp x8, x9, x28, x29, [x30]\n"
|
||||
"casp x10, x11, x28, x29, [x30]\n"
|
||||
"casp x12, x13, x28, x29, [x30]\n"
|
||||
"casp x14, x15, x28, x29, [x30]\n"
|
||||
"casp x16, x17, x28, x29, [x30]\n"
|
||||
"casp x18, x19, x28, x29, [x30]\n"
|
||||
"casp x20, x21, x28, x29, [x30]\n"
|
||||
"casp x22, x23, x28, x29, [x30]\n"
|
||||
"casp x24, x25, x28, x29, [x30]\n"
|
||||
"casp x26, x27, x28, x29, [x30]\n"
|
||||
"casp x28, x29, x28, x29, [x30]\n"
|
||||
"mov x30, v18.d[0]\n"
|
||||
"stp x0, x1, [x30]\n"
|
||||
"stp x2, x3, [x30, #16]\n"
|
||||
"stp x4, x5, [x30, #32]\n"
|
||||
"stp x6, x7, [x30, #48]\n"
|
||||
"stp x8, x9, [x30, #64]\n"
|
||||
"stp x10, x11, [x30, #80]\n"
|
||||
"stp x12, x13, [x30, #96]\n"
|
||||
"stp x14, x15, [x30, #112]\n"
|
||||
"stp x16, x17, [x30, #128]\n"
|
||||
"stp x18, x19, [x30, #144]\n"
|
||||
"stp x20, x21, [x30, #160]\n"
|
||||
"stp x22, x23, [x30, #176]\n"
|
||||
"stp x24, x25, [x30, #192]\n"
|
||||
"stp x26, x27, [x30, #208]\n"
|
||||
"stp x28, x29, [x30, #224]\n"
|
||||
"mov %[dst], v18.d[0]\n"
|
||||
"mov %[loc], v18.d[1]\n"
|
||||
"mov %[wdata], v19.d[0]\n"
|
||||
"mov x30, v19.d[1]\n"
|
||||
"mov x24, v20.d[0]\n"
|
||||
"mov x25, v20.d[1]\n"
|
||||
"mov x26, v21.d[0]\n"
|
||||
"mov x27, v21.d[1]\n"
|
||||
"mov x28, v22.d[0]\n"
|
||||
"mov x29, v22.d[1]\n"
|
||||
:
|
||||
: [wdata] "r"(wdata), [loc] "r"(addr), [dst] "r"(buf)
|
||||
: "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6",
|
||||
"x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14",
|
||||
"x15", "x16", "x17", "x18", "x19", "x20", "x21",
|
||||
"x22", "x23", "v18", "v19", "v20", "v21", "v22");
|
||||
break;
|
||||
case 16:
|
||||
asm volatile(
|
||||
".cpu generic+lse\n"
|
||||
"mov x16, %[wdata]\n"
|
||||
"mov x17, %[wdata]\n"
|
||||
"casp x0, x1, x16, x17, [%[loc]]\n"
|
||||
"casp x2, x3, x16, x17, [%[loc]]\n"
|
||||
"casp x4, x5, x16, x17, [%[loc]]\n"
|
||||
"casp x6, x7, x16, x17, [%[loc]]\n"
|
||||
"casp x8, x9, x16, x17, [%[loc]]\n"
|
||||
"casp x10, x11, x16, x17, [%[loc]]\n"
|
||||
"casp x12, x13, x16, x17, [%[loc]]\n"
|
||||
"casp x14, x15, x16, x17, [%[loc]]\n"
|
||||
"stp x0, x1, [%[dst]]\n"
|
||||
"stp x2, x3, [%[dst], #16]\n"
|
||||
"stp x4, x5, [%[dst], #32]\n"
|
||||
"stp x6, x7, [%[dst], #48]\n"
|
||||
"stp x8, x9, [%[dst], #64]\n"
|
||||
"stp x10, x11, [%[dst], #80]\n"
|
||||
"stp x12, x13, [%[dst], #96]\n"
|
||||
"stp x14, x15, [%[dst], #112]\n"
|
||||
:
|
||||
: [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr)
|
||||
: "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6",
|
||||
"x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14",
|
||||
"x15", "x16", "x17"
|
||||
);
|
||||
break;
|
||||
case 8:
|
||||
asm volatile(
|
||||
".cpu generic+lse\n"
|
||||
"mov x16, %[wdata]\n"
|
||||
"mov x17, %[wdata]\n"
|
||||
"casp x0, x1, x16, x17, [%[loc]]\n"
|
||||
"casp x2, x3, x16, x17, [%[loc]]\n"
|
||||
"casp x4, x5, x16, x17, [%[loc]]\n"
|
||||
"casp x6, x7, x16, x17, [%[loc]]\n"
|
||||
"stp x0, x1, [%[dst]]\n"
|
||||
"stp x2, x3, [%[dst], #16]\n"
|
||||
"stp x4, x5, [%[dst], #32]\n"
|
||||
"stp x6, x7, [%[dst], #48]\n"
|
||||
:
|
||||
: [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr)
|
||||
: "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6",
|
||||
"x7", "x16", "x17"
|
||||
);
|
||||
break;
|
||||
case 4:
|
||||
asm volatile(
|
||||
".cpu generic+lse\n"
|
||||
"mov x16, %[wdata]\n"
|
||||
"mov x17, %[wdata]\n"
|
||||
"casp x0, x1, x16, x17, [%[loc]]\n"
|
||||
"casp x2, x3, x16, x17, [%[loc]]\n"
|
||||
"stp x0, x1, [%[dst]]\n"
|
||||
"stp x2, x3, [%[dst], #16]\n"
|
||||
:
|
||||
: [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr)
|
||||
: "memory", "x0", "x1", "x2", "x3", "x16", "x17"
|
||||
);
|
||||
break;
|
||||
case 2:
|
||||
asm volatile(
|
||||
".cpu generic+lse\n"
|
||||
"mov x16, %[wdata]\n"
|
||||
"mov x17, %[wdata]\n"
|
||||
"casp x0, x1, x16, x17, [%[loc]]\n"
|
||||
"stp x0, x1, [%[dst]]\n"
|
||||
:
|
||||
: [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr)
|
||||
: "memory", "x0", "x1", "x16", "x17"
|
||||
);
|
||||
break;
|
||||
case 1:
|
||||
buf[0] = roc_npa_aura_op_alloc(aura_handle, drop);
|
||||
return !!buf[0];
|
||||
}
|
||||
|
||||
/* Pack the pointers */
|
||||
for (i = 0, count = 0; i < num; i++)
|
||||
if (buf[i])
|
||||
buf[count++] = buf[i];
|
||||
|
||||
return count;
|
||||
#else
|
||||
unsigned int i, count;
|
||||
|
||||
for (i = 0, count = 0; i < num; i++) {
|
||||
buf[count] = roc_npa_aura_op_alloc(aura_handle, drop);
|
||||
if (buf[count])
|
||||
count++;
|
||||
}
|
||||
|
||||
return count;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline unsigned int
|
||||
roc_npa_aura_op_bulk_alloc(uint64_t aura_handle, uint64_t *buf,
|
||||
unsigned int num, const int drop, const int partial)
|
||||
{
|
||||
unsigned int chunk, count, num_alloc;
|
||||
|
||||
count = 0;
|
||||
while (num) {
|
||||
chunk = (num >= ROC_CN9K_NPA_BULK_ALLOC_MAX_PTRS) ?
|
||||
ROC_CN9K_NPA_BULK_ALLOC_MAX_PTRS :
|
||||
plt_align32prevpow2(num);
|
||||
|
||||
num_alloc =
|
||||
roc_npa_aura_bulk_alloc(aura_handle, buf, chunk, drop);
|
||||
|
||||
count += num_alloc;
|
||||
buf += num_alloc;
|
||||
num -= num_alloc;
|
||||
|
||||
if (unlikely(num_alloc != chunk))
|
||||
break;
|
||||
}
|
||||
|
||||
/* If the requested number of pointers was not allocated and if partial
|
||||
* alloc is not desired, then free allocated pointers.
|
||||
*/
|
||||
if (unlikely(num != 0 && !partial)) {
|
||||
roc_npa_aura_op_bulk_free(aura_handle, buf - count, count, 1);
|
||||
count = 0;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
struct roc_npa {
|
||||
struct plt_pci_device *pci_dev;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user