Change movaps to movups in AES-NI code

Currently, the ICP contains accelerated assembly code to be
used specifically on CPUs with AES-NI enabled. This code
makes heavy use of the movaps instruction which assumes that
it will be provided aes keys that are 16 byte aligned. This
assumption seems to hold on Illumos, but on Linux some kernel
options such as 'slub_debug=P' will violate it. This patch
changes all instances of this instruction to movups which is
the same except that it can handle unaligned memory.

This patch also adds a few flags which were accidentally never
given to the assembly compiler, resulting in objtool warnings.

Reviewed by: Gvozden Neskovic <neskovic@gmail.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Nathaniel R. Lewis <linux.robotdude@gmail.com>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #7065 
Closes #7108
This commit is contained in:
Tom Caputi 2018-01-31 18:17:56 -05:00 committed by Brian Behlendorf
parent f90a30ad1b
commit a73c94934f
3 changed files with 49 additions and 48 deletions

View File

@ -26,6 +26,7 @@ endif
obj-$(CONFIG_ZFS) := $(MODULE).o obj-$(CONFIG_ZFS) := $(MODULE).o
asflags-y := -I$(src)/include asflags-y := -I$(src)/include
asflags-y += $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
ccflags-y := -I$(src)/include ccflags-y := -I$(src)/include
ccflags-y += $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS) ccflags-y += $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)

View File

@ -207,7 +207,7 @@ _key_expansion_256a_local:
shufps $0b10001100, %xmm0, %xmm4 shufps $0b10001100, %xmm0, %xmm4
pxor %xmm4, %xmm0 pxor %xmm4, %xmm0
pxor %xmm1, %xmm0 pxor %xmm1, %xmm0
movaps %xmm0, (%rcx) movups %xmm0, (%rcx)
add $0x10, %rcx add $0x10, %rcx
ret ret
nop nop
@ -224,18 +224,18 @@ _key_expansion_192a_local:
pxor %xmm4, %xmm0 pxor %xmm4, %xmm0
pxor %xmm1, %xmm0 pxor %xmm1, %xmm0
movaps %xmm2, %xmm5 movups %xmm2, %xmm5
movaps %xmm2, %xmm6 movups %xmm2, %xmm6
pslldq $4, %xmm5 pslldq $4, %xmm5
pshufd $0b11111111, %xmm0, %xmm3 pshufd $0b11111111, %xmm0, %xmm3
pxor %xmm3, %xmm2 pxor %xmm3, %xmm2
pxor %xmm5, %xmm2 pxor %xmm5, %xmm2
movaps %xmm0, %xmm1 movups %xmm0, %xmm1
shufps $0b01000100, %xmm0, %xmm6 shufps $0b01000100, %xmm0, %xmm6
movaps %xmm6, (%rcx) movups %xmm6, (%rcx)
shufps $0b01001110, %xmm2, %xmm1 shufps $0b01001110, %xmm2, %xmm1
movaps %xmm1, 0x10(%rcx) movups %xmm1, 0x10(%rcx)
add $0x20, %rcx add $0x20, %rcx
ret ret
SET_SIZE(_key_expansion_192a) SET_SIZE(_key_expansion_192a)
@ -250,13 +250,13 @@ _key_expansion_192b_local:
pxor %xmm4, %xmm0 pxor %xmm4, %xmm0
pxor %xmm1, %xmm0 pxor %xmm1, %xmm0
movaps %xmm2, %xmm5 movups %xmm2, %xmm5
pslldq $4, %xmm5 pslldq $4, %xmm5
pshufd $0b11111111, %xmm0, %xmm3 pshufd $0b11111111, %xmm0, %xmm3
pxor %xmm3, %xmm2 pxor %xmm3, %xmm2
pxor %xmm5, %xmm2 pxor %xmm5, %xmm2
movaps %xmm0, (%rcx) movups %xmm0, (%rcx)
add $0x10, %rcx add $0x10, %rcx
ret ret
SET_SIZE(_key_expansion_192b) SET_SIZE(_key_expansion_192b)
@ -270,7 +270,7 @@ _key_expansion_256b_local:
shufps $0b10001100, %xmm2, %xmm4 shufps $0b10001100, %xmm2, %xmm4
pxor %xmm4, %xmm2 pxor %xmm4, %xmm2
pxor %xmm1, %xmm2 pxor %xmm1, %xmm2
movaps %xmm2, (%rcx) movups %xmm2, (%rcx)
add $0x10, %rcx add $0x10, %rcx
ret ret
SET_SIZE(_key_expansion_256b) SET_SIZE(_key_expansion_256b)
@ -327,7 +327,7 @@ rijndael_key_setup_enc_intel_local:
jz .Lenc_key_invalid_param jz .Lenc_key_invalid_param
movups (%USERCIPHERKEY), %xmm0 // user key (first 16 bytes) movups (%USERCIPHERKEY), %xmm0 // user key (first 16 bytes)
movaps %xmm0, (%AESKEY) movups %xmm0, (%AESKEY)
lea 0x10(%AESKEY), %rcx // key addr lea 0x10(%AESKEY), %rcx // key addr
pxor %xmm4, %xmm4 // xmm4 is assumed 0 in _key_expansion_x pxor %xmm4, %xmm4 // xmm4 is assumed 0 in _key_expansion_x
@ -341,7 +341,7 @@ rijndael_key_setup_enc_intel_local:
#endif /* OPENSSL_INTERFACE */ #endif /* OPENSSL_INTERFACE */
movups 0x10(%USERCIPHERKEY), %xmm2 // other user key (2nd 16 bytes) movups 0x10(%USERCIPHERKEY), %xmm2 // other user key (2nd 16 bytes)
movaps %xmm2, (%rcx) movups %xmm2, (%rcx)
add $0x10, %rcx add $0x10, %rcx
aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key
@ -525,10 +525,10 @@ FRAME_BEGIN
.align 4 .align 4
.Ldec_key_reorder_loop: .Ldec_key_reorder_loop:
movaps (%AESKEY), %xmm0 movups (%AESKEY), %xmm0
movaps (%ROUNDS64), %xmm1 movups (%ROUNDS64), %xmm1
movaps %xmm0, (%ROUNDS64) movups %xmm0, (%ROUNDS64)
movaps %xmm1, (%AESKEY) movups %xmm1, (%AESKEY)
lea 0x10(%AESKEY), %AESKEY lea 0x10(%AESKEY), %AESKEY
lea -0x10(%ROUNDS64), %ROUNDS64 lea -0x10(%ROUNDS64), %ROUNDS64
cmp %AESKEY, %ROUNDS64 cmp %AESKEY, %ROUNDS64
@ -536,11 +536,11 @@ FRAME_BEGIN
.align 4 .align 4
.Ldec_key_inv_loop: .Ldec_key_inv_loop:
movaps (%rcx), %xmm0 movups (%rcx), %xmm0
// Convert an encryption round key to a form usable for decryption // Convert an encryption round key to a form usable for decryption
// with the "AES Inverse Mix Columns" instruction // with the "AES Inverse Mix Columns" instruction
aesimc %xmm0, %xmm1 aesimc %xmm0, %xmm1
movaps %xmm1, (%rcx) movups %xmm1, (%rcx)
lea 0x10(%rcx), %rcx lea 0x10(%rcx), %rcx
cmp %ENDAESKEY, %rcx cmp %ENDAESKEY, %rcx
jnz .Ldec_key_inv_loop jnz .Ldec_key_inv_loop
@ -602,7 +602,7 @@ FRAME_BEGIN
ENTRY_NP(aes_encrypt_intel) ENTRY_NP(aes_encrypt_intel)
movups (%INP), %STATE // input movups (%INP), %STATE // input
movaps (%KEYP), %KEY // key movups (%KEYP), %KEY // key
#ifdef OPENSSL_INTERFACE #ifdef OPENSSL_INTERFACE
mov 240(%KEYP), %NROUNDS32 // round count mov 240(%KEYP), %NROUNDS32 // round count
#else /* OpenSolaris Interface */ #else /* OpenSolaris Interface */
@ -618,41 +618,41 @@ ENTRY_NP(aes_encrypt_intel)
// AES 256 // AES 256
lea 0x20(%KEYP), %KEYP lea 0x20(%KEYP), %KEYP
movaps -0x60(%KEYP), %KEY movups -0x60(%KEYP), %KEY
aesenc %KEY, %STATE aesenc %KEY, %STATE
movaps -0x50(%KEYP), %KEY movups -0x50(%KEYP), %KEY
aesenc %KEY, %STATE aesenc %KEY, %STATE
.align 4 .align 4
.Lenc192: .Lenc192:
// AES 192 and 256 // AES 192 and 256
movaps -0x40(%KEYP), %KEY movups -0x40(%KEYP), %KEY
aesenc %KEY, %STATE aesenc %KEY, %STATE
movaps -0x30(%KEYP), %KEY movups -0x30(%KEYP), %KEY
aesenc %KEY, %STATE aesenc %KEY, %STATE
.align 4 .align 4
.Lenc128: .Lenc128:
// AES 128, 192, and 256 // AES 128, 192, and 256
movaps -0x20(%KEYP), %KEY movups -0x20(%KEYP), %KEY
aesenc %KEY, %STATE aesenc %KEY, %STATE
movaps -0x10(%KEYP), %KEY movups -0x10(%KEYP), %KEY
aesenc %KEY, %STATE aesenc %KEY, %STATE
movaps (%KEYP), %KEY movups (%KEYP), %KEY
aesenc %KEY, %STATE aesenc %KEY, %STATE
movaps 0x10(%KEYP), %KEY movups 0x10(%KEYP), %KEY
aesenc %KEY, %STATE aesenc %KEY, %STATE
movaps 0x20(%KEYP), %KEY movups 0x20(%KEYP), %KEY
aesenc %KEY, %STATE aesenc %KEY, %STATE
movaps 0x30(%KEYP), %KEY movups 0x30(%KEYP), %KEY
aesenc %KEY, %STATE aesenc %KEY, %STATE
movaps 0x40(%KEYP), %KEY movups 0x40(%KEYP), %KEY
aesenc %KEY, %STATE aesenc %KEY, %STATE
movaps 0x50(%KEYP), %KEY movups 0x50(%KEYP), %KEY
aesenc %KEY, %STATE aesenc %KEY, %STATE
movaps 0x60(%KEYP), %KEY movups 0x60(%KEYP), %KEY
aesenc %KEY, %STATE aesenc %KEY, %STATE
movaps 0x70(%KEYP), %KEY movups 0x70(%KEYP), %KEY
aesenclast %KEY, %STATE // last round aesenclast %KEY, %STATE // last round
movups %STATE, (%OUTP) // output movups %STATE, (%OUTP) // output
@ -685,7 +685,7 @@ ENTRY_NP(aes_encrypt_intel)
ENTRY_NP(aes_decrypt_intel) ENTRY_NP(aes_decrypt_intel)
movups (%INP), %STATE // input movups (%INP), %STATE // input
movaps (%KEYP), %KEY // key movups (%KEYP), %KEY // key
#ifdef OPENSSL_INTERFACE #ifdef OPENSSL_INTERFACE
mov 240(%KEYP), %NROUNDS32 // round count mov 240(%KEYP), %NROUNDS32 // round count
#else /* OpenSolaris Interface */ #else /* OpenSolaris Interface */
@ -701,41 +701,41 @@ ENTRY_NP(aes_decrypt_intel)
// AES 256 // AES 256
lea 0x20(%KEYP), %KEYP lea 0x20(%KEYP), %KEYP
movaps -0x60(%KEYP), %KEY movups -0x60(%KEYP), %KEY
aesdec %KEY, %STATE aesdec %KEY, %STATE
movaps -0x50(%KEYP), %KEY movups -0x50(%KEYP), %KEY
aesdec %KEY, %STATE aesdec %KEY, %STATE
.align 4 .align 4
.Ldec192: .Ldec192:
// AES 192 and 256 // AES 192 and 256
movaps -0x40(%KEYP), %KEY movups -0x40(%KEYP), %KEY
aesdec %KEY, %STATE aesdec %KEY, %STATE
movaps -0x30(%KEYP), %KEY movups -0x30(%KEYP), %KEY
aesdec %KEY, %STATE aesdec %KEY, %STATE
.align 4 .align 4
.Ldec128: .Ldec128:
// AES 128, 192, and 256 // AES 128, 192, and 256
movaps -0x20(%KEYP), %KEY movups -0x20(%KEYP), %KEY
aesdec %KEY, %STATE aesdec %KEY, %STATE
movaps -0x10(%KEYP), %KEY movups -0x10(%KEYP), %KEY
aesdec %KEY, %STATE aesdec %KEY, %STATE
movaps (%KEYP), %KEY movups (%KEYP), %KEY
aesdec %KEY, %STATE aesdec %KEY, %STATE
movaps 0x10(%KEYP), %KEY movups 0x10(%KEYP), %KEY
aesdec %KEY, %STATE aesdec %KEY, %STATE
movaps 0x20(%KEYP), %KEY movups 0x20(%KEYP), %KEY
aesdec %KEY, %STATE aesdec %KEY, %STATE
movaps 0x30(%KEYP), %KEY movups 0x30(%KEYP), %KEY
aesdec %KEY, %STATE aesdec %KEY, %STATE
movaps 0x40(%KEYP), %KEY movups 0x40(%KEYP), %KEY
aesdec %KEY, %STATE aesdec %KEY, %STATE
movaps 0x50(%KEYP), %KEY movups 0x50(%KEYP), %KEY
aesdec %KEY, %STATE aesdec %KEY, %STATE
movaps 0x60(%KEYP), %KEY movups 0x60(%KEYP), %KEY
aesdec %KEY, %STATE aesdec %KEY, %STATE
movaps 0x70(%KEYP), %KEY movups 0x70(%KEYP), %KEY
aesdeclast %KEY, %STATE // last round aesdeclast %KEY, %STATE // last round
movups %STATE, (%OUTP) // output movups %STATE, (%OUTP) // output

View File

@ -150,7 +150,7 @@ ENTRY_NP(gcm_mul_pclmulqdq)
// Byte swap 16-byte input // Byte swap 16-byte input
// //
lea .Lbyte_swap16_mask(%rip), %rax lea .Lbyte_swap16_mask(%rip), %rax
movaps (%rax), %xmm10 movups (%rax), %xmm10
pshufb %xmm10, %xmm0 pshufb %xmm10, %xmm0
pshufb %xmm10, %xmm1 pshufb %xmm10, %xmm1