From a73c94934f6176f63c3ec4c216a84066e9b65465 Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Wed, 31 Jan 2018 18:17:56 -0500 Subject: [PATCH] Change movaps to movups in AES-NI code Currently, the ICP contains accelerated assembly code to be used specifically on CPUs with AES-NI enabled. This code makes heavy use of the movaps instruction which assumes that it will be provided aes keys that are 16 byte aligned. This assumption seems to hold on Illumos, but on Linux some kernel options such as 'slub_debug=P' will violate it. This patch changes all instances of this instruction to movups which is the same except that it can handle unaligned memory. This patch also adds a few flags which were accidentally never given to the assembly compiler, resulting in objtool warnings. Reviewed by: Gvozden Neskovic Reviewed-by: Brian Behlendorf Reviewed-by: Nathaniel R. Lewis Signed-off-by: Tom Caputi Closes #7065 Closes #7108 --- module/icp/Makefile.in | 1 + module/icp/asm-x86_64/aes/aes_intel.S | 94 ++++++++++++------------- module/icp/asm-x86_64/modes/gcm_intel.S | 2 +- 3 files changed, 49 insertions(+), 48 deletions(-) diff --git a/module/icp/Makefile.in b/module/icp/Makefile.in index 2eb9e6f1f7cd..cfe913e1d015 100644 --- a/module/icp/Makefile.in +++ b/module/icp/Makefile.in @@ -26,6 +26,7 @@ endif obj-$(CONFIG_ZFS) := $(MODULE).o asflags-y := -I$(src)/include +asflags-y += $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS) ccflags-y := -I$(src)/include ccflags-y += $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS) diff --git a/module/icp/asm-x86_64/aes/aes_intel.S b/module/icp/asm-x86_64/aes/aes_intel.S index ed0df75c5513..a40e30fbed5f 100644 --- a/module/icp/asm-x86_64/aes/aes_intel.S +++ b/module/icp/asm-x86_64/aes/aes_intel.S @@ -207,7 +207,7 @@ _key_expansion_256a_local: shufps $0b10001100, %xmm0, %xmm4 pxor %xmm4, %xmm0 pxor %xmm1, %xmm0 - movaps %xmm0, (%rcx) + movups %xmm0, (%rcx) add $0x10, %rcx ret nop @@ -224,18 +224,18 @@ _key_expansion_192a_local: pxor %xmm4, %xmm0 pxor %xmm1, %xmm0 - movaps %xmm2, %xmm5 - movaps %xmm2, %xmm6 + movups %xmm2, %xmm5 + movups %xmm2, %xmm6 pslldq $4, %xmm5 pshufd $0b11111111, %xmm0, %xmm3 pxor %xmm3, %xmm2 pxor %xmm5, %xmm2 - movaps %xmm0, %xmm1 + movups %xmm0, %xmm1 shufps $0b01000100, %xmm0, %xmm6 - movaps %xmm6, (%rcx) + movups %xmm6, (%rcx) shufps $0b01001110, %xmm2, %xmm1 - movaps %xmm1, 0x10(%rcx) + movups %xmm1, 0x10(%rcx) add $0x20, %rcx ret SET_SIZE(_key_expansion_192a) @@ -250,13 +250,13 @@ _key_expansion_192b_local: pxor %xmm4, %xmm0 pxor %xmm1, %xmm0 - movaps %xmm2, %xmm5 + movups %xmm2, %xmm5 pslldq $4, %xmm5 pshufd $0b11111111, %xmm0, %xmm3 pxor %xmm3, %xmm2 pxor %xmm5, %xmm2 - movaps %xmm0, (%rcx) + movups %xmm0, (%rcx) add $0x10, %rcx ret SET_SIZE(_key_expansion_192b) @@ -270,7 +270,7 @@ _key_expansion_256b_local: shufps $0b10001100, %xmm2, %xmm4 pxor %xmm4, %xmm2 pxor %xmm1, %xmm2 - movaps %xmm2, (%rcx) + movups %xmm2, (%rcx) add $0x10, %rcx ret SET_SIZE(_key_expansion_256b) @@ -327,7 +327,7 @@ rijndael_key_setup_enc_intel_local: jz .Lenc_key_invalid_param movups (%USERCIPHERKEY), %xmm0 // user key (first 16 bytes) - movaps %xmm0, (%AESKEY) + movups %xmm0, (%AESKEY) lea 0x10(%AESKEY), %rcx // key addr pxor %xmm4, %xmm4 // xmm4 is assumed 0 in _key_expansion_x @@ -341,7 +341,7 @@ rijndael_key_setup_enc_intel_local: #endif /* OPENSSL_INTERFACE */ movups 0x10(%USERCIPHERKEY), %xmm2 // other user key (2nd 16 bytes) - movaps %xmm2, (%rcx) + movups %xmm2, (%rcx) add $0x10, %rcx aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key @@ -525,10 +525,10 @@ FRAME_BEGIN .align 4 .Ldec_key_reorder_loop: - movaps (%AESKEY), %xmm0 - movaps (%ROUNDS64), %xmm1 - movaps %xmm0, (%ROUNDS64) - movaps %xmm1, (%AESKEY) + movups (%AESKEY), %xmm0 + movups (%ROUNDS64), %xmm1 + movups %xmm0, (%ROUNDS64) + movups %xmm1, (%AESKEY) lea 0x10(%AESKEY), %AESKEY lea -0x10(%ROUNDS64), %ROUNDS64 cmp %AESKEY, %ROUNDS64 @@ -536,11 +536,11 @@ FRAME_BEGIN .align 4 .Ldec_key_inv_loop: - movaps (%rcx), %xmm0 + movups (%rcx), %xmm0 // Convert an encryption round key to a form usable for decryption // with the "AES Inverse Mix Columns" instruction aesimc %xmm0, %xmm1 - movaps %xmm1, (%rcx) + movups %xmm1, (%rcx) lea 0x10(%rcx), %rcx cmp %ENDAESKEY, %rcx jnz .Ldec_key_inv_loop @@ -602,7 +602,7 @@ FRAME_BEGIN ENTRY_NP(aes_encrypt_intel) movups (%INP), %STATE // input - movaps (%KEYP), %KEY // key + movups (%KEYP), %KEY // key #ifdef OPENSSL_INTERFACE mov 240(%KEYP), %NROUNDS32 // round count #else /* OpenSolaris Interface */ @@ -618,41 +618,41 @@ ENTRY_NP(aes_encrypt_intel) // AES 256 lea 0x20(%KEYP), %KEYP - movaps -0x60(%KEYP), %KEY + movups -0x60(%KEYP), %KEY aesenc %KEY, %STATE - movaps -0x50(%KEYP), %KEY + movups -0x50(%KEYP), %KEY aesenc %KEY, %STATE .align 4 .Lenc192: // AES 192 and 256 - movaps -0x40(%KEYP), %KEY + movups -0x40(%KEYP), %KEY aesenc %KEY, %STATE - movaps -0x30(%KEYP), %KEY + movups -0x30(%KEYP), %KEY aesenc %KEY, %STATE .align 4 .Lenc128: // AES 128, 192, and 256 - movaps -0x20(%KEYP), %KEY + movups -0x20(%KEYP), %KEY aesenc %KEY, %STATE - movaps -0x10(%KEYP), %KEY + movups -0x10(%KEYP), %KEY aesenc %KEY, %STATE - movaps (%KEYP), %KEY + movups (%KEYP), %KEY aesenc %KEY, %STATE - movaps 0x10(%KEYP), %KEY + movups 0x10(%KEYP), %KEY aesenc %KEY, %STATE - movaps 0x20(%KEYP), %KEY + movups 0x20(%KEYP), %KEY aesenc %KEY, %STATE - movaps 0x30(%KEYP), %KEY + movups 0x30(%KEYP), %KEY aesenc %KEY, %STATE - movaps 0x40(%KEYP), %KEY + movups 0x40(%KEYP), %KEY aesenc %KEY, %STATE - movaps 0x50(%KEYP), %KEY + movups 0x50(%KEYP), %KEY aesenc %KEY, %STATE - movaps 0x60(%KEYP), %KEY + movups 0x60(%KEYP), %KEY aesenc %KEY, %STATE - movaps 0x70(%KEYP), %KEY + movups 0x70(%KEYP), %KEY aesenclast %KEY, %STATE // last round movups %STATE, (%OUTP) // output @@ -685,7 +685,7 @@ ENTRY_NP(aes_encrypt_intel) ENTRY_NP(aes_decrypt_intel) movups (%INP), %STATE // input - movaps (%KEYP), %KEY // key + movups (%KEYP), %KEY // key #ifdef OPENSSL_INTERFACE mov 240(%KEYP), %NROUNDS32 // round count #else /* OpenSolaris Interface */ @@ -701,41 +701,41 @@ ENTRY_NP(aes_decrypt_intel) // AES 256 lea 0x20(%KEYP), %KEYP - movaps -0x60(%KEYP), %KEY + movups -0x60(%KEYP), %KEY aesdec %KEY, %STATE - movaps -0x50(%KEYP), %KEY + movups -0x50(%KEYP), %KEY aesdec %KEY, %STATE .align 4 .Ldec192: // AES 192 and 256 - movaps -0x40(%KEYP), %KEY + movups -0x40(%KEYP), %KEY aesdec %KEY, %STATE - movaps -0x30(%KEYP), %KEY + movups -0x30(%KEYP), %KEY aesdec %KEY, %STATE .align 4 .Ldec128: // AES 128, 192, and 256 - movaps -0x20(%KEYP), %KEY + movups -0x20(%KEYP), %KEY aesdec %KEY, %STATE - movaps -0x10(%KEYP), %KEY + movups -0x10(%KEYP), %KEY aesdec %KEY, %STATE - movaps (%KEYP), %KEY + movups (%KEYP), %KEY aesdec %KEY, %STATE - movaps 0x10(%KEYP), %KEY + movups 0x10(%KEYP), %KEY aesdec %KEY, %STATE - movaps 0x20(%KEYP), %KEY + movups 0x20(%KEYP), %KEY aesdec %KEY, %STATE - movaps 0x30(%KEYP), %KEY + movups 0x30(%KEYP), %KEY aesdec %KEY, %STATE - movaps 0x40(%KEYP), %KEY + movups 0x40(%KEYP), %KEY aesdec %KEY, %STATE - movaps 0x50(%KEYP), %KEY + movups 0x50(%KEYP), %KEY aesdec %KEY, %STATE - movaps 0x60(%KEYP), %KEY + movups 0x60(%KEYP), %KEY aesdec %KEY, %STATE - movaps 0x70(%KEYP), %KEY + movups 0x70(%KEYP), %KEY aesdeclast %KEY, %STATE // last round movups %STATE, (%OUTP) // output diff --git a/module/icp/asm-x86_64/modes/gcm_intel.S b/module/icp/asm-x86_64/modes/gcm_intel.S index a43b5ebcb7e5..3aec0ee1586d 100644 --- a/module/icp/asm-x86_64/modes/gcm_intel.S +++ b/module/icp/asm-x86_64/modes/gcm_intel.S @@ -150,7 +150,7 @@ ENTRY_NP(gcm_mul_pclmulqdq) // Byte swap 16-byte input // lea .Lbyte_swap16_mask(%rip), %rax - movaps (%rax), %xmm10 + movups (%rax), %xmm10 pshufb %xmm10, %xmm0 pshufb %xmm10, %xmm1