eal/x86: add 256 bytes copy for AVX2

The rte_mov256 function was missing for AVX2.

Fixes: 9144d6bcde ("eal/x86: optimize memcpy for SSE and AVX")

Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
This commit is contained in:
Morten Brørup 2022-08-20 12:30:32 +02:00 committed by David Marchand
parent 40abb903fe
commit a9cfccbb03

View File

@ -371,6 +371,23 @@ rte_mov128(uint8_t *dst, const uint8_t *src)
rte_mov32((uint8_t *)dst + 3 * 32, (const uint8_t *)src + 3 * 32);
}
/**
* Copy 256 bytes from one location to another,
* locations should not overlap.
*/
static __rte_always_inline void
rte_mov256(uint8_t *dst, const uint8_t *src)
{
rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32);
rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32);
rte_mov32((uint8_t *)dst + 2 * 32, (const uint8_t *)src + 2 * 32);
rte_mov32((uint8_t *)dst + 3 * 32, (const uint8_t *)src + 3 * 32);
rte_mov32((uint8_t *)dst + 4 * 32, (const uint8_t *)src + 4 * 32);
rte_mov32((uint8_t *)dst + 5 * 32, (const uint8_t *)src + 5 * 32);
rte_mov32((uint8_t *)dst + 6 * 32, (const uint8_t *)src + 6 * 32);
rte_mov32((uint8_t *)dst + 7 * 32, (const uint8_t *)src + 7 * 32);
}
/**
* Copy 128-byte blocks from one location to another,
* locations should not overlap.