Vectorized fletcher_4 must be 128-bit aligned
The fletcher_4_native() and fletcher_4_byteswap() functions may only safely use the vectorized implementations when the buffer is 128-bit aligned. This is because both the AVX2 and SSE implementations process four 32-bit words per iterations. Fallback to the scalar implementation which only processes a single 32-bit word for unaligned buffers. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Gvozden Neskovic <neskovic@gmail.com> Issue #4330
This commit is contained in:
parent
d1d19c7854
commit
0dab2e84fc
@ -334,7 +334,12 @@ fletcher_4_impl_get(void)
|
||||
void
|
||||
fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
|
||||
{
|
||||
const fletcher_4_ops_t *ops = fletcher_4_impl_get();
|
||||
const fletcher_4_ops_t *ops;
|
||||
|
||||
if (IS_P2ALIGNED(size, 4 * sizeof (uint32_t)))
|
||||
ops = fletcher_4_impl_get();
|
||||
else
|
||||
ops = &fletcher_4_scalar_ops;
|
||||
|
||||
ops->init(zcp);
|
||||
ops->compute(buf, size, zcp);
|
||||
@ -345,7 +350,12 @@ fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
|
||||
void
|
||||
fletcher_4_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp)
|
||||
{
|
||||
const fletcher_4_ops_t *ops = fletcher_4_impl_get();
|
||||
const fletcher_4_ops_t *ops;
|
||||
|
||||
if (IS_P2ALIGNED(size, 4 * sizeof (uint32_t)))
|
||||
ops = fletcher_4_impl_get();
|
||||
else
|
||||
ops = &fletcher_4_scalar_ops;
|
||||
|
||||
ops->init(zcp);
|
||||
ops->compute_byteswap(buf, size, zcp);
|
||||
|
Loading…
Reference in New Issue
Block a user