fc897b24b2
- Benchmark memory block is increased to 128kiB to reflect real block sizes more accurately. Measurements include all three stages needed for checksum generation, i.e. `init()/compute()/fini()`. The inner loop is repeated multiple times to offset overhead of time function. - Fastest implementation selects native and byteswap methods independently in benchmark. To support this new function pointers `init_byteswap()/fini_byteswap()` are introduced. - Implementation mutex lock is replaced by atomic variable. - To save time, benchmark is not executed in userspace. Instead, highest supported implementation is used for fastest. Default userspace selector is still 'cycle'. - `fletcher_4_native/byteswap()` methods use incremental methods to finish calculation if data size is not multiple of vector stride (currently 64B). - Added `fletcher_4_native_varsize()` special purpose method for use when buffer size is not known in advance. The method does not enforce 4B alignment on buffer size, and will ignore last (size % 4) bytes of the data buffer. - Benchmark `kstat` is changed to match the one of vdev_raidz. It now shows throughput for all supported implementations (in B/s), native and byteswap, as well as the code [fastest] is running. Example of `fletcher_4_bench` running on `Intel(R) Xeon(R) CPU E5-2660 v3 @ 2.60GHz`: implementation native byteswap scalar 4768120823 3426105750 sse2 7947841777 4318964249 ssse3 7951922722 6112191941 avx2 13269714358 11043200912 fastest avx2 avx2 Example of `fletcher_4_bench` running on `Intel(R) Xeon Phi(TM) CPU 7210 @ 1.30GHz`: implementation native byteswap scalar 1291115967 1031555336 sse2 2539571138 1280970926 ssse3 2537778746 1080016762 avx2 4950749767 1078493449 avx512f 9581379998 4010029046 fastest avx512f avx512f Signed-off-by: Gvozden Neskovic <neskovic@gmail.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #4952
101 lines
3.1 KiB
C
101 lines
3.1 KiB
C
/*
|
|
* CDDL HEADER START
|
|
*
|
|
* The contents of this file are subject to the terms of the
|
|
* Common Development and Distribution License (the "License").
|
|
* You may not use this file except in compliance with the License.
|
|
*
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
* or http://www.opensolaris.org/os/licensing.
|
|
* See the License for the specific language governing permissions
|
|
* and limitations under the License.
|
|
*
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
*
|
|
* CDDL HEADER END
|
|
*/
|
|
/*
|
|
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
|
* Use is subject to license terms.
|
|
*/
|
|
|
|
#ifndef _ZFS_FLETCHER_H
|
|
#define _ZFS_FLETCHER_H
|
|
|
|
#include <sys/types.h>
|
|
#include <sys/spa_checksum.h>
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
/*
|
|
* fletcher checksum functions
|
|
*
|
|
* Note: Fletcher checksum methods expect buffer size to be 4B aligned. This
|
|
* limitation stems from the algorithm design. Performing incremental checksum
|
|
* without said alignment would yield different results. Therefore, the code
|
|
* includes assertions for the size alignment.
|
|
* For compatibility, it is required that some code paths calculate checksum of
|
|
* non-aligned buffer sizes. For this purpose, `fletcher_4_native_varsize()`
|
|
* checksum method is added. This method will ignore last (size % 4) bytes of
|
|
* the data buffer.
|
|
*/
|
|
void fletcher_2_native(const void *, uint64_t, zio_cksum_t *);
|
|
void fletcher_2_byteswap(const void *, uint64_t, zio_cksum_t *);
|
|
void fletcher_4_native(const void *, uint64_t, zio_cksum_t *);
|
|
void fletcher_4_native_varsize(const void *, uint64_t, zio_cksum_t *);
|
|
void fletcher_4_byteswap(const void *, uint64_t, zio_cksum_t *);
|
|
void fletcher_4_incremental_native(const void *, uint64_t,
|
|
zio_cksum_t *);
|
|
void fletcher_4_incremental_byteswap(const void *, uint64_t,
|
|
zio_cksum_t *);
|
|
int fletcher_4_impl_set(const char *selector);
|
|
void fletcher_4_init(void);
|
|
void fletcher_4_fini(void);
|
|
|
|
|
|
/*
|
|
* fletcher checksum struct
|
|
*/
|
|
typedef void (*fletcher_4_init_f)(zio_cksum_t *);
|
|
typedef void (*fletcher_4_fini_f)(zio_cksum_t *);
|
|
typedef void (*fletcher_4_compute_f)(const void *, uint64_t, zio_cksum_t *);
|
|
|
|
typedef struct fletcher_4_func {
|
|
fletcher_4_init_f init_native;
|
|
fletcher_4_fini_f fini_native;
|
|
fletcher_4_compute_f compute_native;
|
|
fletcher_4_init_f init_byteswap;
|
|
fletcher_4_fini_f fini_byteswap;
|
|
fletcher_4_compute_f compute_byteswap;
|
|
boolean_t (*valid)(void);
|
|
const char *name;
|
|
} fletcher_4_ops_t;
|
|
|
|
#if defined(HAVE_SSE2)
|
|
extern const fletcher_4_ops_t fletcher_4_sse2_ops;
|
|
#endif
|
|
|
|
#if defined(HAVE_SSE2) && defined(HAVE_SSSE3)
|
|
extern const fletcher_4_ops_t fletcher_4_ssse3_ops;
|
|
#endif
|
|
|
|
#if defined(HAVE_AVX) && defined(HAVE_AVX2)
|
|
extern const fletcher_4_ops_t fletcher_4_avx2_ops;
|
|
#endif
|
|
|
|
#if defined(__x86_64) && defined(HAVE_AVX512F)
|
|
extern const fletcher_4_ops_t fletcher_4_avx512f_ops;
|
|
#endif
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif /* _ZFS_FLETCHER_H */
|