freebsd-nq/config/toolchain-simd.m4
Attila Fülöp 31b160f0a6
ICP: Improve AES-GCM performance
Currently SIMD accelerated AES-GCM performance is limited by two
factors:

a. The need to disable preemption and interrupts and save the FPU
state before using it and to do the reverse when done. Due to the
way the code is organized (see (b) below) we have to pay this price
twice for each 16 byte GCM block processed.

b. Most processing is done in C, operating on single GCM blocks.
The use of SIMD instructions is limited to the AES encryption of the
counter block (AES-NI) and the Galois multiplication (PCLMULQDQ).
This leads to the FPU not being fully utilized for crypto
operations.

To solve (a) we do crypto processing in larger chunks while owning
the FPU. An `icp_gcm_avx_chunk_size` module parameter was introduced
to make this chunk size tweakable. It defaults to 32 KiB. This step
alone roughly doubles performance. (b) is tackled by porting and
using the highly optimized openssl AES-GCM assembler routines, which
do all the processing (CTR, AES, GMULT) in a single routine. Both
steps together result in up to 32x reduction of the time spend in
the en/decryption routines, leading up to approximately 12x
throughput increase for large (128 KiB) blocks.

Lastly, this commit changes the default encryption algorithm from
AES-CCM to AES-GCM when setting the `encryption=on` property.

Reviewed-By: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-By: Jason King <jason.king@joyent.com>
Reviewed-By: Tom Caputi <tcaputi@datto.com>
Reviewed-By: Richard Laager <rlaager@wiktel.com>
Signed-off-by: Attila Fülöp <attila@fueloep.org>
Closes 
2020-02-10 12:59:50 -08:00

425 lines
9.4 KiB
Plaintext

dnl #
dnl # Checks if host toolchain supports SIMD instructions
dnl #
AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD], [
case "$host_cpu" in
x86_64 | x86 | i686)
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE2
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE3
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSSE3
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_1
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_2
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX2
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512F
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512CD
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512DQ
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512BW
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512IFMA
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VBMI
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512PF
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512ER
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VL
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AES
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_PCLMULQDQ
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE
;;
esac
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE], [
AC_MSG_CHECKING([whether host toolchain supports SSE])
AC_LINK_IFELSE([AC_LANG_SOURCE([[
void main()
{
__asm__ __volatile__("xorps %xmm0, %xmm1");
}
]])], [
AC_DEFINE([HAVE_SSE], 1, [Define if host toolchain supports SSE])
AC_MSG_RESULT([yes])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE2
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE2], [
AC_MSG_CHECKING([whether host toolchain supports SSE2])
AC_LINK_IFELSE([AC_LANG_SOURCE([[
void main()
{
__asm__ __volatile__("pxor %xmm0, %xmm1");
}
]])], [
AC_DEFINE([HAVE_SSE2], 1, [Define if host toolchain supports SSE2])
AC_MSG_RESULT([yes])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE3
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE3], [
AC_MSG_CHECKING([whether host toolchain supports SSE3])
AC_LINK_IFELSE([AC_LANG_SOURCE([[
void main()
{
char v[16];
__asm__ __volatile__("lddqu %0,%%xmm0" :: "m"(v[0]));
}
]])], [
AC_DEFINE([HAVE_SSE3], 1, [Define if host toolchain supports SSE3])
AC_MSG_RESULT([yes])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSSE3
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSSE3], [
AC_MSG_CHECKING([whether host toolchain supports SSSE3])
AC_LINK_IFELSE([AC_LANG_SOURCE([[
void main()
{
__asm__ __volatile__("pshufb %xmm0,%xmm1");
}
]])], [
AC_DEFINE([HAVE_SSSE3], 1, [Define if host toolchain supports SSSE3])
AC_MSG_RESULT([yes])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_1
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_1], [
AC_MSG_CHECKING([whether host toolchain supports SSE4.1])
AC_LINK_IFELSE([AC_LANG_SOURCE([[
void main()
{
__asm__ __volatile__("pmaxsb %xmm0,%xmm1");
}
]])], [
AC_DEFINE([HAVE_SSE4_1], 1, [Define if host toolchain supports SSE4.1])
AC_MSG_RESULT([yes])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_2
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_2], [
AC_MSG_CHECKING([whether host toolchain supports SSE4.2])
AC_LINK_IFELSE([AC_LANG_SOURCE([[
void main()
{
__asm__ __volatile__("pcmpgtq %xmm0, %xmm1");
}
]])], [
AC_DEFINE([HAVE_SSE4_2], 1, [Define if host toolchain supports SSE4.2])
AC_MSG_RESULT([yes])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX], [
AC_MSG_CHECKING([whether host toolchain supports AVX])
AC_LINK_IFELSE([AC_LANG_SOURCE([[
void main()
{
char v[32];
__asm__ __volatile__("vmovdqa %0,%%ymm0" :: "m"(v[0]));
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_AVX], 1, [Define if host toolchain supports AVX])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX2
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX2], [
AC_MSG_CHECKING([whether host toolchain supports AVX2])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
void main()
{
__asm__ __volatile__("vpshufb %ymm0,%ymm1,%ymm2");
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_AVX2], 1, [Define if host toolchain supports AVX2])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512F
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512F], [
AC_MSG_CHECKING([whether host toolchain supports AVX512F])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
void main()
{
__asm__ __volatile__("vpandd %zmm0,%zmm1,%zmm2");
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_AVX512F], 1, [Define if host toolchain supports AVX512F])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512CD
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512CD], [
AC_MSG_CHECKING([whether host toolchain supports AVX512CD])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
void main()
{
__asm__ __volatile__("vplzcntd %zmm0,%zmm1");
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_AVX512CD], 1, [Define if host toolchain supports AVX512CD])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512DQ
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512DQ], [
AC_MSG_CHECKING([whether host toolchain supports AVX512DQ])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
void main()
{
__asm__ __volatile__("vandpd %zmm0,%zmm1,%zmm2");
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_AVX512DQ], 1, [Define if host toolchain supports AVX512DQ])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512BW
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512BW], [
AC_MSG_CHECKING([whether host toolchain supports AVX512BW])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
void main()
{
__asm__ __volatile__("vpshufb %zmm0,%zmm1,%zmm2");
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_AVX512BW], 1, [Define if host toolchain supports AVX512BW])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512IFMA
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512IFMA], [
AC_MSG_CHECKING([whether host toolchain supports AVX512IFMA])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
void main()
{
__asm__ __volatile__("vpmadd52luq %zmm0,%zmm1,%zmm2");
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_AVX512IFMA], 1, [Define if host toolchain supports AVX512IFMA])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VBMI
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VBMI], [
AC_MSG_CHECKING([whether host toolchain supports AVX512VBMI])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
void main()
{
__asm__ __volatile__("vpermb %zmm0,%zmm1,%zmm2");
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_AVX512VBMI], 1, [Define if host toolchain supports AVX512VBMI])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512PF
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512PF], [
AC_MSG_CHECKING([whether host toolchain supports AVX512PF])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
void main()
{
__asm__ __volatile__("vgatherpf0dps (%rsi,%zmm0,4){%k1}");
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_AVX512PF], 1, [Define if host toolchain supports AVX512PF])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512ER
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512ER], [
AC_MSG_CHECKING([whether host toolchain supports AVX512ER])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
void main()
{
__asm__ __volatile__("vexp2pd %zmm0,%zmm1");
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_AVX512ER], 1, [Define if host toolchain supports AVX512ER])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VL
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VL], [
AC_MSG_CHECKING([whether host toolchain supports AVX512VL])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
void main()
{
__asm__ __volatile__("vpabsq %zmm0,%zmm1");
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_AVX512VL], 1, [Define if host toolchain supports AVX512VL])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AES
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AES], [
AC_MSG_CHECKING([whether host toolchain supports AES])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
void main()
{
__asm__ __volatile__("aesenc %xmm0, %xmm1");
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_AES], 1, [Define if host toolchain supports AES])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_PCLMULQDQ
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_PCLMULQDQ], [
AC_MSG_CHECKING([whether host toolchain supports PCLMULQDQ])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
void main()
{
__asm__ __volatile__("pclmulqdq %0, %%xmm0, %%xmm1" :: "i"(0));
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_PCLMULQDQ], 1, [Define if host toolchain supports PCLMULQDQ])
], [
AC_MSG_RESULT([no])
])
])
dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE], [
AC_MSG_CHECKING([whether host toolchain supports MOVBE])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
void main()
{
__asm__ __volatile__("movbe 0(%eax), %eax");
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_MOVBE], 1, [Define if host toolchain supports MOVBE])
], [
AC_MSG_RESULT([no])
])
])