Regen amd64 assembly files for OpenSSL 1.1.1.

This commit is contained in:
jkim 2018-09-13 21:07:09 +00:00
parent 8ea5e5a891
commit bd1b5d2a7f
35 changed files with 14949 additions and 524 deletions

View File

@ -49,12 +49,15 @@ sha256-armv8.S: sha512-armv8.pl
${LCRYPTO_SRC}/crypto/aes/asm \
${LCRYPTO_SRC}/crypto/bn/asm \
${LCRYPTO_SRC}/crypto/camellia/asm \
${LCRYPTO_SRC}/crypto/chacha/asm \
${LCRYPTO_SRC}/crypto/ec/asm \
${LCRYPTO_SRC}/crypto/md5/asm \
${LCRYPTO_SRC}/crypto/modes/asm \
${LCRYPTO_SRC}/crypto/poly1305/asm \
${LCRYPTO_SRC}/crypto/rc4/asm \
${LCRYPTO_SRC}/crypto/sha/asm \
${LCRYPTO_SRC}/crypto/whrlpool/asm
${LCRYPTO_SRC}/crypto/whrlpool/asm \
${LCRYPTO_SRC}/engines/asm
# aes
SRCS= aes-x86_64.pl aesni-mb-x86_64.pl aesni-sha1-x86_64.pl \
@ -68,8 +71,14 @@ SRCS+= rsaz-avx2.pl rsaz-x86_64.pl x86_64-gf2m.pl x86_64-mont.pl \
# camellia
SRCS+= cmll-x86_64.pl
# chacha
SRCS+= chacha-x86_64.pl
# ec
SRCS+= ecp_nistz256-x86_64.pl
SRCS+= ecp_nistz256-avx2.pl ecp_nistz256-x86_64.pl x25519-x86_64.pl
# engines
SRCS+= e_padlock-x86_64.pl
# md5
SRCS+= md5-x86_64.pl
@ -77,11 +86,16 @@ SRCS+= md5-x86_64.pl
# modes
SRCS+= aesni-gcm-x86_64.pl ghash-x86_64.pl
# poly1305
SRCS+= poly1305-x86_64.pl
# rc4
SRCS+= rc4-md5-x86_64.pl rc4-x86_64.pl
# sha
SRCS+= sha1-mb-x86_64.pl sha1-x86_64.pl sha256-mb-x86_64.pl
SRCS+= keccak1600-avx2.pl keccak1600-avx512.pl keccak1600-avx512vl.pl \
keccak1600-x86_64.pl sha1-mb-x86_64.pl sha1-x86_64.pl \
sha256-mb-x86_64.pl
# whrlpool
SRCS+= wp-x86_64.pl

View File

@ -334,15 +334,23 @@ _x86_64_AES_encrypt_compact:
.hidden asm_AES_encrypt
asm_AES_encrypt:
AES_encrypt:
.cfi_startproc
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
movq %rsp,%r10
leaq -63(%rdx),%rcx
andq $-64,%rsp
subq %rsp,%rcx
@ -352,7 +360,8 @@ AES_encrypt:
subq $32,%rsp
movq %rsi,16(%rsp)
movq %r10,24(%rsp)
movq %rax,24(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x18,0x06,0x23,0x08
.Lenc_prologue:
movq %rdx,%r15
@ -379,20 +388,29 @@ AES_encrypt:
movq 16(%rsp),%r9
movq 24(%rsp),%rsi
.cfi_def_cfa %rsi,8
movl %eax,0(%r9)
movl %ebx,4(%r9)
movl %ecx,8(%r9)
movl %edx,12(%r9)
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lenc_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size AES_encrypt,.-AES_encrypt
.type _x86_64_AES_decrypt,@function
.align 16
@ -781,15 +799,23 @@ _x86_64_AES_decrypt_compact:
.hidden asm_AES_decrypt
asm_AES_decrypt:
AES_decrypt:
.cfi_startproc
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
movq %rsp,%r10
leaq -63(%rdx),%rcx
andq $-64,%rsp
subq %rsp,%rcx
@ -799,7 +825,8 @@ AES_decrypt:
subq $32,%rsp
movq %rsi,16(%rsp)
movq %r10,24(%rsp)
movq %rax,24(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x18,0x06,0x23,0x08
.Ldec_prologue:
movq %rdx,%r15
@ -828,42 +855,69 @@ AES_decrypt:
movq 16(%rsp),%r9
movq 24(%rsp),%rsi
.cfi_def_cfa %rsi,8
movl %eax,0(%r9)
movl %ebx,4(%r9)
movl %ecx,8(%r9)
movl %edx,12(%r9)
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Ldec_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size AES_decrypt,.-AES_decrypt
.globl private_AES_set_encrypt_key
.type private_AES_set_encrypt_key,@function
.globl AES_set_encrypt_key
.type AES_set_encrypt_key,@function
.align 16
private_AES_set_encrypt_key:
AES_set_encrypt_key:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
subq $8,%rsp
.cfi_adjust_cfa_offset 8
.Lenc_key_prologue:
call _x86_64_AES_set_encrypt_key
movq 40(%rsp),%rbp
.cfi_restore %rbp
movq 48(%rsp),%rbx
.cfi_restore %rbx
addq $56,%rsp
.cfi_adjust_cfa_offset -56
.Lenc_key_epilogue:
.byte 0xf3,0xc3
.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
.cfi_endproc
.size AES_set_encrypt_key,.-AES_set_encrypt_key
.type _x86_64_AES_set_encrypt_key,@function
.align 16
@ -1104,17 +1158,31 @@ _x86_64_AES_set_encrypt_key:
.Lexit:
.byte 0xf3,0xc3
.size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key
.globl private_AES_set_decrypt_key
.type private_AES_set_decrypt_key,@function
.globl AES_set_decrypt_key
.type AES_set_decrypt_key,@function
.align 16
private_AES_set_decrypt_key:
AES_set_decrypt_key:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
pushq %rdx
.cfi_adjust_cfa_offset 8
.Ldec_key_prologue:
call _x86_64_AES_set_encrypt_key
@ -1282,15 +1350,23 @@ private_AES_set_decrypt_key:
xorq %rax,%rax
.Labort:
movq 8(%rsp),%r15
.cfi_restore %r15
movq 16(%rsp),%r14
.cfi_restore %r14
movq 24(%rsp),%r13
.cfi_restore %r13
movq 32(%rsp),%r12
.cfi_restore %r12
movq 40(%rsp),%rbp
.cfi_restore %rbp
movq 48(%rsp),%rbx
.cfi_restore %rbx
addq $56,%rsp
.cfi_adjust_cfa_offset -56
.Ldec_key_epilogue:
.byte 0xf3,0xc3
.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
.cfi_endproc
.size AES_set_decrypt_key,.-AES_set_decrypt_key
.globl AES_cbc_encrypt
.type AES_cbc_encrypt,@function
.align 16
@ -1299,25 +1375,39 @@ private_AES_set_decrypt_key:
.hidden asm_AES_cbc_encrypt
asm_AES_cbc_encrypt:
AES_cbc_encrypt:
.cfi_startproc
cmpq $0,%rdx
je .Lcbc_epilogue
pushfq
.cfi_adjust_cfa_offset 8
.cfi_offset 49,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-32
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-40
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-48
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-56
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-64
.Lcbc_prologue:
cld
movl %r9d,%r9d
leaq .LAES_Te(%rip),%r14
leaq .LAES_Td(%rip),%r10
cmpq $0,%r9
jne .Lcbc_picked_te
leaq .LAES_Td(%rip),%r14
.Lcbc_picked_te:
cmoveq %r10,%r14
movl OPENSSL_ia32cap_P(%rip),%r10d
cmpq $512,%rdx
@ -1353,8 +1443,10 @@ AES_cbc_encrypt:
.Lcbc_te_ok:
xchgq %rsp,%r15
.cfi_def_cfa_register %r15
movq %r15,16(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x40
.Lcbc_fast_body:
movq %rdi,24(%rsp)
movq %rsi,32(%rsp)
@ -1736,17 +1828,28 @@ AES_cbc_encrypt:
.align 16
.Lcbc_exit:
movq 16(%rsp),%rsi
.cfi_def_cfa %rsi,64
movq (%rsi),%r15
.cfi_restore %r15
movq 8(%rsi),%r14
.cfi_restore %r14
movq 16(%rsi),%r13
.cfi_restore %r13
movq 24(%rsi),%r12
.cfi_restore %r12
movq 32(%rsi),%rbp
.cfi_restore %rbp
movq 40(%rsi),%rbx
.cfi_restore %rbx
leaq 48(%rsi),%rsp
.cfi_def_cfa %rsp,16
.Lcbc_popfq:
popfq
.cfi_adjust_cfa_offset -8
.cfi_restore 49
.Lcbc_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size AES_cbc_encrypt,.-AES_cbc_encrypt
.align 64
.LAES_Te:

View File

@ -317,17 +317,25 @@ _aesni_ctr32_ghash_6x:
.type aesni_gcm_decrypt,@function
.align 32
aesni_gcm_decrypt:
.cfi_startproc
xorq %r10,%r10
cmpq $0x60,%rdx
jb .Lgcm_dec_abort
leaq (%rsp),%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
vzeroupper
vmovdqu (%r8),%xmm1
@ -389,15 +397,23 @@ aesni_gcm_decrypt:
vzeroupper
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lgcm_dec_abort:
movq %r10,%rax
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
.type _aesni_ctr32_6x,@function
.align 32
@ -494,17 +510,25 @@ _aesni_ctr32_6x:
.type aesni_gcm_encrypt,@function
.align 32
aesni_gcm_encrypt:
.cfi_startproc
xorq %r10,%r10
cmpq $288,%rdx
jb .Lgcm_enc_abort
leaq (%rsp),%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
vzeroupper
vmovdqu (%r8),%xmm1
@ -730,15 +754,23 @@ aesni_gcm_encrypt:
vzeroupper
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lgcm_enc_abort:
movq %r10,%rax
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
.align 64
.Lbswap_mask:

View File

@ -8,6 +8,7 @@
.type aesni_multi_cbc_encrypt,@function
.align 32
aesni_multi_cbc_encrypt:
.cfi_startproc
cmpl $2,%edx
jb .Lenc_non_avx
movl OPENSSL_ia32cap_P+4(%rip),%ecx
@ -17,12 +18,19 @@ aesni_multi_cbc_encrypt:
.align 16
.Lenc_non_avx:
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
@ -32,6 +40,7 @@ aesni_multi_cbc_encrypt:
subq $48,%rsp
andq $-64,%rsp
movq %rax,16(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08
.Lenc4x_body:
movdqu (%rsi),%xmm12
@ -241,6 +250,7 @@ aesni_multi_cbc_encrypt:
jnz .Loop_enc4x
movq 16(%rsp),%rax
.cfi_def_cfa %rax,8
movl 24(%rsp),%edx
@ -258,20 +268,29 @@ aesni_multi_cbc_encrypt:
.Lenc4x_done:
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lenc4x_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt
.globl aesni_multi_cbc_decrypt
.type aesni_multi_cbc_decrypt,@function
.align 32
aesni_multi_cbc_decrypt:
.cfi_startproc
cmpl $2,%edx
jb .Ldec_non_avx
movl OPENSSL_ia32cap_P+4(%rip),%ecx
@ -281,12 +300,19 @@ aesni_multi_cbc_decrypt:
.align 16
.Ldec_non_avx:
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
@ -296,6 +322,7 @@ aesni_multi_cbc_decrypt:
subq $48,%rsp
andq $-64,%rsp
movq %rax,16(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08
.Ldec4x_body:
movdqu (%rsi),%xmm12
@ -505,6 +532,7 @@ aesni_multi_cbc_decrypt:
jnz .Loop_dec4x
movq 16(%rsp),%rax
.cfi_def_cfa %rax,8
movl 24(%rsp),%edx
leaq 160(%rdi),%rdi
@ -513,26 +541,42 @@ aesni_multi_cbc_decrypt:
.Ldec4x_done:
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Ldec4x_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt
.type aesni_multi_cbc_encrypt_avx,@function
.align 32
aesni_multi_cbc_encrypt_avx:
.cfi_startproc
_avx_cbc_enc_shortcut:
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
@ -544,6 +588,7 @@ _avx_cbc_enc_shortcut:
subq $192,%rsp
andq $-128,%rsp
movq %rax,16(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08
.Lenc8x_body:
vzeroupper
@ -941,6 +986,7 @@ _avx_cbc_enc_shortcut:
jnz .Loop_enc8x
movq 16(%rsp),%rax
.cfi_def_cfa %rax,8
@ -949,27 +995,43 @@ _avx_cbc_enc_shortcut:
.Lenc8x_done:
vzeroupper
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lenc8x_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx
.type aesni_multi_cbc_decrypt_avx,@function
.align 32
aesni_multi_cbc_decrypt_avx:
.cfi_startproc
_avx_cbc_dec_shortcut:
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
@ -983,6 +1045,7 @@ _avx_cbc_dec_shortcut:
andq $-256,%rsp
subq $192,%rsp
movq %rax,16(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08
.Ldec8x_body:
vzeroupper
@ -1418,6 +1481,7 @@ _avx_cbc_dec_shortcut:
jnz .Loop_dec8x
movq 16(%rsp),%rax
.cfi_def_cfa %rax,8
@ -1426,12 +1490,20 @@ _avx_cbc_dec_shortcut:
.Ldec8x_done:
vzeroupper
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Ldec8x_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx

View File

@ -23,16 +23,30 @@ aesni_cbc_sha1_enc:
.type aesni_cbc_sha1_enc_ssse3,@function
.align 32
aesni_cbc_sha1_enc_ssse3:
.cfi_startproc
movq 8(%rsp),%r10
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
leaq -104(%rsp),%rsp
.cfi_adjust_cfa_offset 104
movq %rdi,%r12
@ -1364,29 +1378,52 @@ aesni_cbc_sha1_enc_ssse3:
movl %ebp,16(%r9)
movups %xmm2,(%r8)
leaq 104(%rsp),%rsi
.cfi_def_cfa %rsi,56
movq 0(%rsi),%r15
.cfi_restore %r15
movq 8(%rsi),%r14
.cfi_restore %r14
movq 16(%rsi),%r13
.cfi_restore %r13
movq 24(%rsi),%r12
.cfi_restore %r12
movq 32(%rsi),%rbp
.cfi_restore %rbp
movq 40(%rsi),%rbx
.cfi_restore %rbx
leaq 48(%rsi),%rsp
.cfi_def_cfa %rsp,8
.Lepilogue_ssse3:
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3
.type aesni_cbc_sha1_enc_avx,@function
.align 32
aesni_cbc_sha1_enc_avx:
.cfi_startproc
movq 8(%rsp),%r10
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
leaq -104(%rsp),%rsp
.cfi_adjust_cfa_offset 104
vzeroall
@ -2662,15 +2699,24 @@ aesni_cbc_sha1_enc_avx:
vmovups %xmm12,(%r8)
vzeroall
leaq 104(%rsp),%rsi
.cfi_def_cfa %rsi,56
movq 0(%rsi),%r15
.cfi_restore %r15
movq 8(%rsi),%r14
.cfi_restore %r14
movq 16(%rsi),%r13
.cfi_restore %r13
movq 24(%rsi),%r12
.cfi_restore %r12
movq 32(%rsi),%rbp
.cfi_restore %rbp
movq 40(%rsi),%rbx
.cfi_restore %rbx
leaq 48(%rsi),%rsp
.cfi_def_cfa %rsp,8
.Lepilogue_avx:
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_cbc_sha1_enc_avx,.-aesni_cbc_sha1_enc_avx
.align 64
K_XX_XX:

View File

@ -79,15 +79,23 @@ K256:
.type aesni_cbc_sha256_enc_xop,@function
.align 64
aesni_cbc_sha256_enc_xop:
.cfi_startproc
.Lxop_shortcut:
movq 8(%rsp),%r10
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
movq %rsp,%r11
.cfi_offset %r15,-56
subq $128,%rsp
andq $-64,%rsp
@ -103,7 +111,8 @@ aesni_cbc_sha256_enc_xop:
movq %r8,64+32(%rsp)
movq %r9,64+40(%rsp)
movq %r10,64+48(%rsp)
movq %r11,64+56(%rsp)
movq %rax,120(%rsp)
.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
.Lprologue_xop:
vzeroall
@ -1209,31 +1218,48 @@ aesni_cbc_sha256_enc_xop:
jb .Lloop_xop
movq 64+32(%rsp),%r8
movq 64+56(%rsp),%rsi
movq 120(%rsp),%rsi
.cfi_def_cfa %rsi,8
vmovdqu %xmm8,(%r8)
vzeroall
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_xop:
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_cbc_sha256_enc_xop,.-aesni_cbc_sha256_enc_xop
.type aesni_cbc_sha256_enc_avx,@function
.align 64
aesni_cbc_sha256_enc_avx:
.cfi_startproc
.Lavx_shortcut:
movq 8(%rsp),%r10
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
movq %rsp,%r11
.cfi_offset %r15,-56
subq $128,%rsp
andq $-64,%rsp
@ -1249,7 +1275,8 @@ aesni_cbc_sha256_enc_avx:
movq %r8,64+32(%rsp)
movq %r9,64+40(%rsp)
movq %r10,64+48(%rsp)
movq %r11,64+56(%rsp)
movq %rax,120(%rsp)
.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
.Lprologue_avx:
vzeroall
@ -2386,31 +2413,48 @@ aesni_cbc_sha256_enc_avx:
jb .Lloop_avx
movq 64+32(%rsp),%r8
movq 64+56(%rsp),%rsi
movq 120(%rsp),%rsi
.cfi_def_cfa %rsi,8
vmovdqu %xmm8,(%r8)
vzeroall
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_avx:
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_cbc_sha256_enc_avx,.-aesni_cbc_sha256_enc_avx
.type aesni_cbc_sha256_enc_avx2,@function
.align 64
aesni_cbc_sha256_enc_avx2:
.cfi_startproc
.Lavx2_shortcut:
movq 8(%rsp),%r10
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
movq %rsp,%r11
.cfi_offset %r15,-56
subq $576,%rsp
andq $-1024,%rsp
addq $448,%rsp
@ -2427,7 +2471,8 @@ aesni_cbc_sha256_enc_avx2:
movq %r8,64+32(%rsp)
movq %r9,64+40(%rsp)
movq %r10,64+48(%rsp)
movq %r11,64+56(%rsp)
movq %rax,120(%rsp)
.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
.Lprologue_avx2:
vzeroall
@ -3989,18 +4034,27 @@ aesni_cbc_sha256_enc_avx2:
.Ldone_avx2:
leaq (%rbp),%rsp
movq 64+32(%rsp),%r8
movq 64+56(%rsp),%rsi
movq 120(%rsp),%rsi
.cfi_def_cfa %rsi,8
vmovdqu %xmm8,(%r8)
vzeroall
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_avx2:
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_cbc_sha256_enc_avx2,.-aesni_cbc_sha256_enc_avx2
.type aesni_cbc_sha256_enc_shaext,@function
.align 32

File diff suppressed because it is too large Load Diff

View File

@ -1069,6 +1069,7 @@ _bsaes_key_convert:
.type bsaes_cbc_encrypt,@function
.align 16
bsaes_cbc_encrypt:
.cfi_startproc
cmpl $0,%r9d
jne asm_AES_cbc_encrypt
cmpq $128,%rdx
@ -1077,13 +1078,27 @@ bsaes_cbc_encrypt:
movq %rsp,%rax
.Lcbc_dec_prologue:
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
leaq -72(%rsp),%rsp
.cfi_adjust_cfa_offset 0x48
movq %rsp,%rbp
.cfi_def_cfa_register %rbp
movl 240(%rcx),%eax
movq %rdi,%r12
movq %rsi,%r13
@ -1302,33 +1317,56 @@ bsaes_cbc_encrypt:
cmpq %rax,%rbp
ja .Lcbc_dec_bzero
leaq (%rbp),%rsp
movq 72(%rsp),%r15
movq 80(%rsp),%r14
movq 88(%rsp),%r13
movq 96(%rsp),%r12
movq 104(%rsp),%rbx
movq 112(%rsp),%rax
leaq 120(%rsp),%rsp
movq %rax,%rbp
leaq 120(%rbp),%rax
.cfi_def_cfa %rax,8
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbx
.cfi_restore %rbx
movq -8(%rax),%rbp
.cfi_restore %rbp
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lcbc_dec_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
.globl bsaes_ctr32_encrypt_blocks
.type bsaes_ctr32_encrypt_blocks,@function
.align 16
bsaes_ctr32_encrypt_blocks:
.cfi_startproc
movq %rsp,%rax
.Lctr_enc_prologue:
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
leaq -72(%rsp),%rsp
.cfi_adjust_cfa_offset 0x48
movq %rsp,%rbp
.cfi_def_cfa_register %rbp
movdqu (%r8),%xmm0
movl 240(%rcx),%eax
movq %rdi,%r12
@ -1502,32 +1540,55 @@ bsaes_ctr32_encrypt_blocks:
cmpq %rax,%rbp
ja .Lctr_enc_bzero
leaq (%rbp),%rsp
movq 72(%rsp),%r15
movq 80(%rsp),%r14
movq 88(%rsp),%r13
movq 96(%rsp),%r12
movq 104(%rsp),%rbx
movq 112(%rsp),%rax
leaq 120(%rsp),%rsp
movq %rax,%rbp
leaq 120(%rbp),%rax
.cfi_def_cfa %rax,8
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbx
.cfi_restore %rbx
movq -8(%rax),%rbp
.cfi_restore %rbp
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lctr_enc_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
.globl bsaes_xts_encrypt
.type bsaes_xts_encrypt,@function
.align 16
bsaes_xts_encrypt:
.cfi_startproc
movq %rsp,%rax
.Lxts_enc_prologue:
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
leaq -72(%rsp),%rsp
.cfi_adjust_cfa_offset 0x48
movq %rsp,%rbp
.cfi_def_cfa_register %rbp
movq %rdi,%r12
movq %rsi,%r13
movq %rdx,%r14
@ -1953,32 +2014,54 @@ bsaes_xts_encrypt:
cmpq %rax,%rbp
ja .Lxts_enc_bzero
leaq (%rbp),%rsp
movq 72(%rsp),%r15
movq 80(%rsp),%r14
movq 88(%rsp),%r13
movq 96(%rsp),%r12
movq 104(%rsp),%rbx
movq 112(%rsp),%rax
leaq 120(%rsp),%rsp
movq %rax,%rbp
leaq 120(%rbp),%rax
.cfi_def_cfa %rax,8
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbx
.cfi_restore %rbx
movq -8(%rax),%rbp
.cfi_restore %rbp
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lxts_enc_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size bsaes_xts_encrypt,.-bsaes_xts_encrypt
.globl bsaes_xts_decrypt
.type bsaes_xts_decrypt,@function
.align 16
bsaes_xts_decrypt:
.cfi_startproc
movq %rsp,%rax
.Lxts_dec_prologue:
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
leaq -72(%rsp),%rsp
.cfi_adjust_cfa_offset 0x48
movq %rsp,%rbp
movq %rdi,%r12
movq %rsi,%r13
@ -2431,17 +2514,25 @@ bsaes_xts_decrypt:
cmpq %rax,%rbp
ja .Lxts_dec_bzero
leaq (%rbp),%rsp
movq 72(%rsp),%r15
movq 80(%rsp),%r14
movq 88(%rsp),%r13
movq 96(%rsp),%r12
movq 104(%rsp),%rbx
movq 112(%rsp),%rax
leaq 120(%rsp),%rsp
movq %rax,%rbp
leaq 120(%rbp),%rax
.cfi_def_cfa %rax,8
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbx
.cfi_restore %rbx
movq -8(%rax),%rbp
.cfi_restore %rbp
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lxts_dec_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size bsaes_xts_decrypt,.-bsaes_xts_decrypt
.type _bsaes_const,@object
.align 64

File diff suppressed because it is too large Load Diff

View File

@ -19,11 +19,22 @@ Camellia_EncryptBlock:
.align 16
.Lenc_rounds:
Camellia_EncryptBlock_Rounds:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-32
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-40
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-48
.Lenc_prologue:
@ -55,13 +66,20 @@ Camellia_EncryptBlock_Rounds:
movl %r11d,12(%r13)
movq 0(%rsp),%r15
.cfi_restore %r15
movq 8(%rsp),%r14
.cfi_restore %r14
movq 16(%rsp),%r13
.cfi_restore %r13
movq 24(%rsp),%rbp
.cfi_restore %rbp
movq 32(%rsp),%rbx
.cfi_restore %rbx
leaq 40(%rsp),%rsp
.cfi_adjust_cfa_offset -40
.Lenc_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size Camellia_EncryptBlock_Rounds,.-Camellia_EncryptBlock_Rounds
.type _x86_64_Camellia_encrypt,@function
@ -288,11 +306,22 @@ Camellia_DecryptBlock:
.align 16
.Ldec_rounds:
Camellia_DecryptBlock_Rounds:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-32
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-40
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-48
.Ldec_prologue:
@ -324,13 +353,20 @@ Camellia_DecryptBlock_Rounds:
movl %r11d,12(%r13)
movq 0(%rsp),%r15
.cfi_restore %r15
movq 8(%rsp),%r14
.cfi_restore %r14
movq 16(%rsp),%r13
.cfi_restore %r13
movq 24(%rsp),%rbp
.cfi_restore %rbp
movq 32(%rsp),%rbx
.cfi_restore %rbx
leaq 40(%rsp),%rsp
.cfi_adjust_cfa_offset -40
.Ldec_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size Camellia_DecryptBlock_Rounds,.-Camellia_DecryptBlock_Rounds
.type _x86_64_Camellia_decrypt,@function
@ -544,11 +580,22 @@ _x86_64_Camellia_decrypt:
.type Camellia_Ekeygen,@function
.align 16
Camellia_Ekeygen:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-32
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-40
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-48
.Lkey_prologue:
movl %edi,%r15d
@ -1076,13 +1123,20 @@ Camellia_Ekeygen:
movl $4,%eax
.Ldone:
movq 0(%rsp),%r15
.cfi_restore %r15
movq 8(%rsp),%r14
.cfi_restore %r14
movq 16(%rsp),%r13
.cfi_restore %r13
movq 24(%rsp),%rbp
.cfi_restore %rbp
movq 32(%rsp),%rbx
.cfi_restore %rbx
leaq 40(%rsp),%rsp
.cfi_adjust_cfa_offset -40
.Lkey_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size Camellia_Ekeygen,.-Camellia_Ekeygen
.align 64
.LCamellia_SIGMA:
@ -1607,17 +1661,31 @@ Camellia_Ekeygen:
.type Camellia_cbc_encrypt,@function
.align 16
Camellia_cbc_encrypt:
.cfi_startproc
cmpq $0,%rdx
je .Lcbc_abort
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
.Lcbc_prologue:
movq %rsp,%rbp
.cfi_def_cfa_register %rbp
subq $64,%rsp
andq $-64,%rsp
@ -1638,6 +1706,7 @@ Camellia_cbc_encrypt:
movq %r8,40(%rsp)
movq %rbp,48(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x30,0x06,0x23,0x38
.Lcbc_body:
leaq .LCamellia_SBOX(%rip),%rbp
@ -1826,15 +1895,24 @@ Camellia_cbc_encrypt:
.align 16
.Lcbc_done:
movq 48(%rsp),%rcx
.cfi_def_cfa %rcx,56
movq 0(%rcx),%r15
.cfi_restore %r15
movq 8(%rcx),%r14
.cfi_restore %r14
movq 16(%rcx),%r13
.cfi_restore %r13
movq 24(%rcx),%r12
.cfi_restore %r12
movq 32(%rcx),%rbp
.cfi_restore %rbp
movq 40(%rcx),%rbx
.cfi_restore %rbx
leaq 48(%rcx),%rsp
.cfi_def_cfa %rsp,8
.Lcbc_abort:
.byte 0xf3,0xc3
.cfi_endproc
.size Camellia_cbc_encrypt,.-Camellia_cbc_encrypt
.byte 67,97,109,101,108,108,105,97,32,102,111,114,32,120,56,54,95,54,52,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -7,9 +7,27 @@
.type gcm_gmult_4bit,@function
.align 16
gcm_gmult_4bit:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
subq $280,%rsp
.cfi_adjust_cfa_offset 280
.Lgmult_prologue:
movzbq 15(%rdi),%r8
@ -86,22 +104,41 @@ gcm_gmult_4bit:
movq %r8,8(%rdi)
movq %r9,(%rdi)
movq 16(%rsp),%rbx
leaq 24(%rsp),%rsp
leaq 280+48(%rsp),%rsi
.cfi_def_cfa %rsi,8
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lgmult_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size gcm_gmult_4bit,.-gcm_gmult_4bit
.globl gcm_ghash_4bit
.type gcm_ghash_4bit,@function
.align 16
gcm_ghash_4bit:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
subq $280,%rsp
.cfi_adjust_cfa_offset 280
.Lghash_prologue:
movq %rdx,%r14
movq %rcx,%r15
@ -646,16 +683,25 @@ gcm_ghash_4bit:
movq %r8,8(%rdi)
movq %r9,(%rdi)
leaq 280(%rsp),%rsi
movq 0(%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
leaq 280+48(%rsp),%rsi
.cfi_def_cfa %rsi,8
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq 0(%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lghash_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size gcm_ghash_4bit,.-gcm_ghash_4bit
.globl gcm_init_clmul
.type gcm_init_clmul,@function

View File

@ -0,0 +1,2 @@
/* $FreeBSD$ */
/* Do not modify. This file is auto-generated from keccak1600-avx2.pl. */

View File

@ -0,0 +1,2 @@
/* $FreeBSD$ */
/* Do not modify. This file is auto-generated from keccak1600-avx512.pl. */

View File

@ -0,0 +1,2 @@
/* $FreeBSD$ */
/* Do not modify. This file is auto-generated from keccak1600-avx512vl.pl. */

View File

@ -0,0 +1,524 @@
/* $FreeBSD$ */
/* Do not modify. This file is auto-generated from keccak1600-x86_64.pl. */
.text
.type __KeccakF1600,@function
.align 32
__KeccakF1600:
movq 60(%rdi),%rax
movq 68(%rdi),%rbx
movq 76(%rdi),%rcx
movq 84(%rdi),%rdx
movq 92(%rdi),%rbp
jmp .Loop
.align 32
.Loop:
movq -100(%rdi),%r8
movq -52(%rdi),%r9
movq -4(%rdi),%r10
movq 44(%rdi),%r11
xorq -84(%rdi),%rcx
xorq -76(%rdi),%rdx
xorq %r8,%rax
xorq -92(%rdi),%rbx
xorq -44(%rdi),%rcx
xorq -60(%rdi),%rax
movq %rbp,%r12
xorq -68(%rdi),%rbp
xorq %r10,%rcx
xorq -20(%rdi),%rax
xorq -36(%rdi),%rdx
xorq %r9,%rbx
xorq -28(%rdi),%rbp
xorq 36(%rdi),%rcx
xorq 20(%rdi),%rax
xorq 4(%rdi),%rdx
xorq -12(%rdi),%rbx
xorq 12(%rdi),%rbp
movq %rcx,%r13
rolq $1,%rcx
xorq %rax,%rcx
xorq %r11,%rdx
rolq $1,%rax
xorq %rdx,%rax
xorq 28(%rdi),%rbx
rolq $1,%rdx
xorq %rbx,%rdx
xorq 52(%rdi),%rbp
rolq $1,%rbx
xorq %rbp,%rbx
rolq $1,%rbp
xorq %r13,%rbp
xorq %rcx,%r9
xorq %rdx,%r10
rolq $44,%r9
xorq %rbp,%r11
xorq %rax,%r12
rolq $43,%r10
xorq %rbx,%r8
movq %r9,%r13
rolq $21,%r11
orq %r10,%r9
xorq %r8,%r9
rolq $14,%r12
xorq (%r15),%r9
leaq 8(%r15),%r15
movq %r12,%r14
andq %r11,%r12
movq %r9,-100(%rsi)
xorq %r10,%r12
notq %r10
movq %r12,-84(%rsi)
orq %r11,%r10
movq 76(%rdi),%r12
xorq %r13,%r10
movq %r10,-92(%rsi)
andq %r8,%r13
movq -28(%rdi),%r9
xorq %r14,%r13
movq -20(%rdi),%r10
movq %r13,-68(%rsi)
orq %r8,%r14
movq -76(%rdi),%r8
xorq %r11,%r14
movq 28(%rdi),%r11
movq %r14,-76(%rsi)
xorq %rbp,%r8
xorq %rdx,%r12
rolq $28,%r8
xorq %rcx,%r11
xorq %rax,%r9
rolq $61,%r12
rolq $45,%r11
xorq %rbx,%r10
rolq $20,%r9
movq %r8,%r13
orq %r12,%r8
rolq $3,%r10
xorq %r11,%r8
movq %r8,-36(%rsi)
movq %r9,%r14
andq %r13,%r9
movq -92(%rdi),%r8
xorq %r12,%r9
notq %r12
movq %r9,-28(%rsi)
orq %r11,%r12
movq -44(%rdi),%r9
xorq %r10,%r12
movq %r12,-44(%rsi)
andq %r10,%r11
movq 60(%rdi),%r12
xorq %r14,%r11
movq %r11,-52(%rsi)
orq %r10,%r14
movq 4(%rdi),%r10
xorq %r13,%r14
movq 52(%rdi),%r11
movq %r14,-60(%rsi)
xorq %rbp,%r10
xorq %rax,%r11
rolq $25,%r10
xorq %rdx,%r9
rolq $8,%r11
xorq %rbx,%r12
rolq $6,%r9
xorq %rcx,%r8
rolq $18,%r12
movq %r10,%r13
andq %r11,%r10
rolq $1,%r8
notq %r11
xorq %r9,%r10
movq %r10,-12(%rsi)
movq %r12,%r14
andq %r11,%r12
movq -12(%rdi),%r10
xorq %r13,%r12
movq %r12,-4(%rsi)
orq %r9,%r13
movq 84(%rdi),%r12
xorq %r8,%r13
movq %r13,-20(%rsi)
andq %r8,%r9
xorq %r14,%r9
movq %r9,12(%rsi)
orq %r8,%r14
movq -60(%rdi),%r9
xorq %r11,%r14
movq 36(%rdi),%r11
movq %r14,4(%rsi)
movq -68(%rdi),%r8
xorq %rcx,%r10
xorq %rdx,%r11
rolq $10,%r10
xorq %rbx,%r9
rolq $15,%r11
xorq %rbp,%r12
rolq $36,%r9
xorq %rax,%r8
rolq $56,%r12
movq %r10,%r13
orq %r11,%r10
rolq $27,%r8
notq %r11
xorq %r9,%r10
movq %r10,28(%rsi)
movq %r12,%r14
orq %r11,%r12
xorq %r13,%r12
movq %r12,36(%rsi)
andq %r9,%r13
xorq %r8,%r13
movq %r13,20(%rsi)
orq %r8,%r9
xorq %r14,%r9
movq %r9,52(%rsi)
andq %r14,%r8
xorq %r11,%r8
movq %r8,44(%rsi)
xorq -84(%rdi),%rdx
xorq -36(%rdi),%rbp
rolq $62,%rdx
xorq 68(%rdi),%rcx
rolq $55,%rbp
xorq 12(%rdi),%rax
rolq $2,%rcx
xorq 20(%rdi),%rbx
xchgq %rsi,%rdi
rolq $39,%rax
rolq $41,%rbx
movq %rdx,%r13
andq %rbp,%rdx
notq %rbp
xorq %rcx,%rdx
movq %rdx,92(%rdi)
movq %rax,%r14
andq %rbp,%rax
xorq %r13,%rax
movq %rax,60(%rdi)
orq %rcx,%r13
xorq %rbx,%r13
movq %r13,84(%rdi)
andq %rbx,%rcx
xorq %r14,%rcx
movq %rcx,76(%rdi)
orq %r14,%rbx
xorq %rbp,%rbx
movq %rbx,68(%rdi)
movq %rdx,%rbp
movq %r13,%rdx
testq $255,%r15
jnz .Loop
leaq -192(%r15),%r15
.byte 0xf3,0xc3
.size __KeccakF1600,.-__KeccakF1600
.type KeccakF1600,@function
.align 32
KeccakF1600:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
leaq 100(%rdi),%rdi
subq $200,%rsp
.cfi_adjust_cfa_offset 200
notq -92(%rdi)
notq -84(%rdi)
notq -36(%rdi)
notq -4(%rdi)
notq 36(%rdi)
notq 60(%rdi)
leaq iotas(%rip),%r15
leaq 100(%rsp),%rsi
call __KeccakF1600
notq -92(%rdi)
notq -84(%rdi)
notq -36(%rdi)
notq -4(%rdi)
notq 36(%rdi)
notq 60(%rdi)
leaq -100(%rdi),%rdi
addq $200,%rsp
.cfi_adjust_cfa_offset -200
popq %r15
.cfi_adjust_cfa_offset -8
.cfi_restore %r15
popq %r14
.cfi_adjust_cfa_offset -8
.cfi_restore %r14
popq %r13
.cfi_adjust_cfa_offset -8
.cfi_restore %r13
popq %r12
.cfi_adjust_cfa_offset -8
.cfi_restore %r12
popq %rbp
.cfi_adjust_cfa_offset -8
.cfi_restore %rbp
popq %rbx
.cfi_adjust_cfa_offset -8
.cfi_restore %rbx
.byte 0xf3,0xc3
.cfi_endproc
.size KeccakF1600,.-KeccakF1600
.globl SHA3_absorb
.type SHA3_absorb,@function
.align 32
SHA3_absorb:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
leaq 100(%rdi),%rdi
subq $232,%rsp
.cfi_adjust_cfa_offset 232
movq %rsi,%r9
leaq 100(%rsp),%rsi
notq -92(%rdi)
notq -84(%rdi)
notq -36(%rdi)
notq -4(%rdi)
notq 36(%rdi)
notq 60(%rdi)
leaq iotas(%rip),%r15
movq %rcx,216-100(%rsi)
.Loop_absorb:
cmpq %rcx,%rdx
jc .Ldone_absorb
shrq $3,%rcx
leaq -100(%rdi),%r8
.Lblock_absorb:
movq (%r9),%rax
leaq 8(%r9),%r9
xorq (%r8),%rax
leaq 8(%r8),%r8
subq $8,%rdx
movq %rax,-8(%r8)
subq $1,%rcx
jnz .Lblock_absorb
movq %r9,200-100(%rsi)
movq %rdx,208-100(%rsi)
call __KeccakF1600
movq 200-100(%rsi),%r9
movq 208-100(%rsi),%rdx
movq 216-100(%rsi),%rcx
jmp .Loop_absorb
.align 32
.Ldone_absorb:
movq %rdx,%rax
notq -92(%rdi)
notq -84(%rdi)
notq -36(%rdi)
notq -4(%rdi)
notq 36(%rdi)
notq 60(%rdi)
addq $232,%rsp
.cfi_adjust_cfa_offset -232
popq %r15
.cfi_adjust_cfa_offset -8
.cfi_restore %r15
popq %r14
.cfi_adjust_cfa_offset -8
.cfi_restore %r14
popq %r13
.cfi_adjust_cfa_offset -8
.cfi_restore %r13
popq %r12
.cfi_adjust_cfa_offset -8
.cfi_restore %r12
popq %rbp
.cfi_adjust_cfa_offset -8
.cfi_restore %rbp
popq %rbx
.cfi_adjust_cfa_offset -8
.cfi_restore %rbx
.byte 0xf3,0xc3
.cfi_endproc
.size SHA3_absorb,.-SHA3_absorb
.globl SHA3_squeeze
.type SHA3_squeeze,@function
.align 32
SHA3_squeeze:
.cfi_startproc
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-16
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-24
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-32
shrq $3,%rcx
movq %rdi,%r8
movq %rsi,%r12
movq %rdx,%r13
movq %rcx,%r14
jmp .Loop_squeeze
.align 32
.Loop_squeeze:
cmpq $8,%r13
jb .Ltail_squeeze
movq (%r8),%rax
leaq 8(%r8),%r8
movq %rax,(%r12)
leaq 8(%r12),%r12
subq $8,%r13
jz .Ldone_squeeze
subq $1,%rcx
jnz .Loop_squeeze
call KeccakF1600
movq %rdi,%r8
movq %r14,%rcx
jmp .Loop_squeeze
.Ltail_squeeze:
movq %r8,%rsi
movq %r12,%rdi
movq %r13,%rcx
.byte 0xf3,0xa4
.Ldone_squeeze:
popq %r14
.cfi_adjust_cfa_offset -8
.cfi_restore %r14
popq %r13
.cfi_adjust_cfa_offset -8
.cfi_restore %r13
popq %r12
.cfi_adjust_cfa_offset -8
.cfi_restore %r13
.byte 0xf3,0xc3
.cfi_endproc
.size SHA3_squeeze,.-SHA3_squeeze
.align 256
.quad 0,0,0,0,0,0,0,0
.type iotas,@object
iotas:
.quad 0x0000000000000001
.quad 0x0000000000008082
.quad 0x800000000000808a
.quad 0x8000000080008000
.quad 0x000000000000808b
.quad 0x0000000080000001
.quad 0x8000000080008081
.quad 0x8000000000008009
.quad 0x000000000000008a
.quad 0x0000000000000088
.quad 0x0000000080008009
.quad 0x000000008000000a
.quad 0x000000008000808b
.quad 0x800000000000008b
.quad 0x8000000000008089
.quad 0x8000000000008003
.quad 0x8000000000008002
.quad 0x8000000000000080
.quad 0x000000000000800a
.quad 0x800000008000000a
.quad 0x8000000080008081
.quad 0x8000000000008080
.quad 0x0000000080000001
.quad 0x8000000080008008
.size iotas,.-iotas
.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0

View File

@ -6,11 +6,22 @@
.globl md5_block_asm_data_order
.type md5_block_asm_data_order,@function
md5_block_asm_data_order:
.cfi_startproc
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-40
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-48
.Lprologue:
@ -44,8 +55,8 @@ md5_block_asm_data_order:
xorl %ecx,%r11d
leal -680876936(%rax,%r10,1),%eax
andl %ebx,%r11d
xorl %edx,%r11d
movl 4(%rsi),%r10d
xorl %edx,%r11d
addl %r11d,%eax
roll $7,%eax
movl %ecx,%r11d
@ -53,8 +64,8 @@ md5_block_asm_data_order:
xorl %ebx,%r11d
leal -389564586(%rdx,%r10,1),%edx
andl %eax,%r11d
xorl %ecx,%r11d
movl 8(%rsi),%r10d
xorl %ecx,%r11d
addl %r11d,%edx
roll $12,%edx
movl %ebx,%r11d
@ -62,8 +73,8 @@ md5_block_asm_data_order:
xorl %eax,%r11d
leal 606105819(%rcx,%r10,1),%ecx
andl %edx,%r11d
xorl %ebx,%r11d
movl 12(%rsi),%r10d
xorl %ebx,%r11d
addl %r11d,%ecx
roll $17,%ecx
movl %eax,%r11d
@ -71,8 +82,8 @@ md5_block_asm_data_order:
xorl %edx,%r11d
leal -1044525330(%rbx,%r10,1),%ebx
andl %ecx,%r11d
xorl %eax,%r11d
movl 16(%rsi),%r10d
xorl %eax,%r11d
addl %r11d,%ebx
roll $22,%ebx
movl %edx,%r11d
@ -80,8 +91,8 @@ md5_block_asm_data_order:
xorl %ecx,%r11d
leal -176418897(%rax,%r10,1),%eax
andl %ebx,%r11d
xorl %edx,%r11d
movl 20(%rsi),%r10d
xorl %edx,%r11d
addl %r11d,%eax
roll $7,%eax
movl %ecx,%r11d
@ -89,8 +100,8 @@ md5_block_asm_data_order:
xorl %ebx,%r11d
leal 1200080426(%rdx,%r10,1),%edx
andl %eax,%r11d
xorl %ecx,%r11d
movl 24(%rsi),%r10d
xorl %ecx,%r11d
addl %r11d,%edx
roll $12,%edx
movl %ebx,%r11d
@ -98,8 +109,8 @@ md5_block_asm_data_order:
xorl %eax,%r11d
leal -1473231341(%rcx,%r10,1),%ecx
andl %edx,%r11d
xorl %ebx,%r11d
movl 28(%rsi),%r10d
xorl %ebx,%r11d
addl %r11d,%ecx
roll $17,%ecx
movl %eax,%r11d
@ -107,8 +118,8 @@ md5_block_asm_data_order:
xorl %edx,%r11d
leal -45705983(%rbx,%r10,1),%ebx
andl %ecx,%r11d
xorl %eax,%r11d
movl 32(%rsi),%r10d
xorl %eax,%r11d
addl %r11d,%ebx
roll $22,%ebx
movl %edx,%r11d
@ -116,8 +127,8 @@ md5_block_asm_data_order:
xorl %ecx,%r11d
leal 1770035416(%rax,%r10,1),%eax
andl %ebx,%r11d
xorl %edx,%r11d
movl 36(%rsi),%r10d
xorl %edx,%r11d
addl %r11d,%eax
roll $7,%eax
movl %ecx,%r11d
@ -125,8 +136,8 @@ md5_block_asm_data_order:
xorl %ebx,%r11d
leal -1958414417(%rdx,%r10,1),%edx
andl %eax,%r11d
xorl %ecx,%r11d
movl 40(%rsi),%r10d
xorl %ecx,%r11d
addl %r11d,%edx
roll $12,%edx
movl %ebx,%r11d
@ -134,8 +145,8 @@ md5_block_asm_data_order:
xorl %eax,%r11d
leal -42063(%rcx,%r10,1),%ecx
andl %edx,%r11d
xorl %ebx,%r11d
movl 44(%rsi),%r10d
xorl %ebx,%r11d
addl %r11d,%ecx
roll $17,%ecx
movl %eax,%r11d
@ -143,8 +154,8 @@ md5_block_asm_data_order:
xorl %edx,%r11d
leal -1990404162(%rbx,%r10,1),%ebx
andl %ecx,%r11d
xorl %eax,%r11d
movl 48(%rsi),%r10d
xorl %eax,%r11d
addl %r11d,%ebx
roll $22,%ebx
movl %edx,%r11d
@ -152,8 +163,8 @@ md5_block_asm_data_order:
xorl %ecx,%r11d
leal 1804603682(%rax,%r10,1),%eax
andl %ebx,%r11d
xorl %edx,%r11d
movl 52(%rsi),%r10d
xorl %edx,%r11d
addl %r11d,%eax
roll $7,%eax
movl %ecx,%r11d
@ -161,8 +172,8 @@ md5_block_asm_data_order:
xorl %ebx,%r11d
leal -40341101(%rdx,%r10,1),%edx
andl %eax,%r11d
xorl %ecx,%r11d
movl 56(%rsi),%r10d
xorl %ecx,%r11d
addl %r11d,%edx
roll $12,%edx
movl %ebx,%r11d
@ -170,8 +181,8 @@ md5_block_asm_data_order:
xorl %eax,%r11d
leal -1502002290(%rcx,%r10,1),%ecx
andl %edx,%r11d
xorl %ebx,%r11d
movl 60(%rsi),%r10d
xorl %ebx,%r11d
addl %r11d,%ecx
roll $17,%ecx
movl %eax,%r11d
@ -179,18 +190,17 @@ md5_block_asm_data_order:
xorl %edx,%r11d
leal 1236535329(%rbx,%r10,1),%ebx
andl %ecx,%r11d
movl 4(%rsi),%r10d
xorl %eax,%r11d
movl 0(%rsi),%r10d
addl %r11d,%ebx
roll $22,%ebx
movl %edx,%r11d
addl %ecx,%ebx
movl 4(%rsi),%r10d
movl %edx,%r11d
movl %edx,%r12d
notl %r11d
leal -165796510(%rax,%r10,1),%eax
andl %ebx,%r12d
leal -165796510(%rax,%r10,1),%eax
andl %ecx,%r11d
movl 24(%rsi),%r10d
orl %r11d,%r12d
@ -200,8 +210,8 @@ md5_block_asm_data_order:
roll $5,%eax
addl %ebx,%eax
notl %r11d
leal -1069501632(%rdx,%r10,1),%edx
andl %eax,%r12d
leal -1069501632(%rdx,%r10,1),%edx
andl %ebx,%r11d
movl 44(%rsi),%r10d
orl %r11d,%r12d
@ -211,8 +221,8 @@ md5_block_asm_data_order:
roll $9,%edx
addl %eax,%edx
notl %r11d
leal 643717713(%rcx,%r10,1),%ecx
andl %edx,%r12d
leal 643717713(%rcx,%r10,1),%ecx
andl %eax,%r11d
movl 0(%rsi),%r10d
orl %r11d,%r12d
@ -222,8 +232,8 @@ md5_block_asm_data_order:
roll $14,%ecx
addl %edx,%ecx
notl %r11d
leal -373897302(%rbx,%r10,1),%ebx
andl %ecx,%r12d
leal -373897302(%rbx,%r10,1),%ebx
andl %edx,%r11d
movl 20(%rsi),%r10d
orl %r11d,%r12d
@ -233,8 +243,8 @@ md5_block_asm_data_order:
roll $20,%ebx
addl %ecx,%ebx
notl %r11d
leal -701558691(%rax,%r10,1),%eax
andl %ebx,%r12d
leal -701558691(%rax,%r10,1),%eax
andl %ecx,%r11d
movl 40(%rsi),%r10d
orl %r11d,%r12d
@ -244,8 +254,8 @@ md5_block_asm_data_order:
roll $5,%eax
addl %ebx,%eax
notl %r11d
leal 38016083(%rdx,%r10,1),%edx
andl %eax,%r12d
leal 38016083(%rdx,%r10,1),%edx
andl %ebx,%r11d
movl 60(%rsi),%r10d
orl %r11d,%r12d
@ -255,8 +265,8 @@ md5_block_asm_data_order:
roll $9,%edx
addl %eax,%edx
notl %r11d
leal -660478335(%rcx,%r10,1),%ecx
andl %edx,%r12d
leal -660478335(%rcx,%r10,1),%ecx
andl %eax,%r11d
movl 16(%rsi),%r10d
orl %r11d,%r12d
@ -266,8 +276,8 @@ md5_block_asm_data_order:
roll $14,%ecx
addl %edx,%ecx
notl %r11d
leal -405537848(%rbx,%r10,1),%ebx
andl %ecx,%r12d
leal -405537848(%rbx,%r10,1),%ebx
andl %edx,%r11d
movl 36(%rsi),%r10d
orl %r11d,%r12d
@ -277,8 +287,8 @@ md5_block_asm_data_order:
roll $20,%ebx
addl %ecx,%ebx
notl %r11d
leal 568446438(%rax,%r10,1),%eax
andl %ebx,%r12d
leal 568446438(%rax,%r10,1),%eax
andl %ecx,%r11d
movl 56(%rsi),%r10d
orl %r11d,%r12d
@ -288,8 +298,8 @@ md5_block_asm_data_order:
roll $5,%eax
addl %ebx,%eax
notl %r11d
leal -1019803690(%rdx,%r10,1),%edx
andl %eax,%r12d
leal -1019803690(%rdx,%r10,1),%edx
andl %ebx,%r11d
movl 12(%rsi),%r10d
orl %r11d,%r12d
@ -299,8 +309,8 @@ md5_block_asm_data_order:
roll $9,%edx
addl %eax,%edx
notl %r11d
leal -187363961(%rcx,%r10,1),%ecx
andl %edx,%r12d
leal -187363961(%rcx,%r10,1),%ecx
andl %eax,%r11d
movl 32(%rsi),%r10d
orl %r11d,%r12d
@ -310,8 +320,8 @@ md5_block_asm_data_order:
roll $14,%ecx
addl %edx,%ecx
notl %r11d
leal 1163531501(%rbx,%r10,1),%ebx
andl %ecx,%r12d
leal 1163531501(%rbx,%r10,1),%ebx
andl %edx,%r11d
movl 52(%rsi),%r10d
orl %r11d,%r12d
@ -321,8 +331,8 @@ md5_block_asm_data_order:
roll $20,%ebx
addl %ecx,%ebx
notl %r11d
leal -1444681467(%rax,%r10,1),%eax
andl %ebx,%r12d
leal -1444681467(%rax,%r10,1),%eax
andl %ecx,%r11d
movl 8(%rsi),%r10d
orl %r11d,%r12d
@ -332,8 +342,8 @@ md5_block_asm_data_order:
roll $5,%eax
addl %ebx,%eax
notl %r11d
leal -51403784(%rdx,%r10,1),%edx
andl %eax,%r12d
leal -51403784(%rdx,%r10,1),%edx
andl %ebx,%r11d
movl 28(%rsi),%r10d
orl %r11d,%r12d
@ -343,8 +353,8 @@ md5_block_asm_data_order:
roll $9,%edx
addl %eax,%edx
notl %r11d
leal 1735328473(%rcx,%r10,1),%ecx
andl %edx,%r12d
leal 1735328473(%rcx,%r10,1),%ecx
andl %eax,%r11d
movl 48(%rsi),%r10d
orl %r11d,%r12d
@ -354,289 +364,287 @@ md5_block_asm_data_order:
roll $14,%ecx
addl %edx,%ecx
notl %r11d
leal -1926607734(%rbx,%r10,1),%ebx
andl %ecx,%r12d
leal -1926607734(%rbx,%r10,1),%ebx
andl %edx,%r11d
movl 0(%rsi),%r10d
movl 20(%rsi),%r10d
orl %r11d,%r12d
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
roll $20,%ebx
addl %ecx,%ebx
movl 20(%rsi),%r10d
movl %ecx,%r11d
leal -378558(%rax,%r10,1),%eax
movl 32(%rsi),%r10d
xorl %edx,%r11d
movl 32(%rsi),%r10d
xorl %ebx,%r11d
addl %r11d,%eax
roll $4,%eax
movl %ebx,%r11d
roll $4,%eax
addl %ebx,%eax
leal -2022574463(%rdx,%r10,1),%edx
movl 44(%rsi),%r10d
xorl %ecx,%r11d
movl 44(%rsi),%r10d
xorl %eax,%r11d
addl %r11d,%edx
roll $11,%edx
movl %eax,%r11d
addl %eax,%edx
leal 1839030562(%rcx,%r10,1),%ecx
movl 56(%rsi),%r10d
xorl %ebx,%r11d
movl 56(%rsi),%r10d
xorl %edx,%r11d
addl %r11d,%ecx
roll $16,%ecx
movl %edx,%r11d
roll $16,%ecx
addl %edx,%ecx
leal -35309556(%rbx,%r10,1),%ebx
movl 4(%rsi),%r10d
xorl %eax,%r11d
movl 4(%rsi),%r10d
xorl %ecx,%r11d
addl %r11d,%ebx
roll $23,%ebx
movl %ecx,%r11d
addl %ecx,%ebx
leal -1530992060(%rax,%r10,1),%eax
movl 16(%rsi),%r10d
xorl %edx,%r11d
movl 16(%rsi),%r10d
xorl %ebx,%r11d
addl %r11d,%eax
roll $4,%eax
movl %ebx,%r11d
roll $4,%eax
addl %ebx,%eax
leal 1272893353(%rdx,%r10,1),%edx
movl 28(%rsi),%r10d
xorl %ecx,%r11d
movl 28(%rsi),%r10d
xorl %eax,%r11d
addl %r11d,%edx
roll $11,%edx
movl %eax,%r11d
addl %eax,%edx
leal -155497632(%rcx,%r10,1),%ecx
movl 40(%rsi),%r10d
xorl %ebx,%r11d
movl 40(%rsi),%r10d
xorl %edx,%r11d
addl %r11d,%ecx
roll $16,%ecx
movl %edx,%r11d
roll $16,%ecx
addl %edx,%ecx
leal -1094730640(%rbx,%r10,1),%ebx
movl 52(%rsi),%r10d
xorl %eax,%r11d
movl 52(%rsi),%r10d
xorl %ecx,%r11d
addl %r11d,%ebx
roll $23,%ebx
movl %ecx,%r11d
addl %ecx,%ebx
leal 681279174(%rax,%r10,1),%eax
movl 0(%rsi),%r10d
xorl %edx,%r11d
movl 0(%rsi),%r10d
xorl %ebx,%r11d
addl %r11d,%eax
roll $4,%eax
movl %ebx,%r11d
roll $4,%eax
addl %ebx,%eax
leal -358537222(%rdx,%r10,1),%edx
movl 12(%rsi),%r10d
xorl %ecx,%r11d
movl 12(%rsi),%r10d
xorl %eax,%r11d
addl %r11d,%edx
roll $11,%edx
movl %eax,%r11d
addl %eax,%edx
leal -722521979(%rcx,%r10,1),%ecx
movl 24(%rsi),%r10d
xorl %ebx,%r11d
movl 24(%rsi),%r10d
xorl %edx,%r11d
addl %r11d,%ecx
roll $16,%ecx
movl %edx,%r11d
roll $16,%ecx
addl %edx,%ecx
leal 76029189(%rbx,%r10,1),%ebx
movl 36(%rsi),%r10d
xorl %eax,%r11d
movl 36(%rsi),%r10d
xorl %ecx,%r11d
addl %r11d,%ebx
roll $23,%ebx
movl %ecx,%r11d
addl %ecx,%ebx
leal -640364487(%rax,%r10,1),%eax
movl 48(%rsi),%r10d
xorl %edx,%r11d
movl 48(%rsi),%r10d
xorl %ebx,%r11d
addl %r11d,%eax
roll $4,%eax
movl %ebx,%r11d
roll $4,%eax
addl %ebx,%eax
leal -421815835(%rdx,%r10,1),%edx
movl 60(%rsi),%r10d
xorl %ecx,%r11d
movl 60(%rsi),%r10d
xorl %eax,%r11d
addl %r11d,%edx
roll $11,%edx
movl %eax,%r11d
addl %eax,%edx
leal 530742520(%rcx,%r10,1),%ecx
movl 8(%rsi),%r10d
xorl %ebx,%r11d
movl 8(%rsi),%r10d
xorl %edx,%r11d
addl %r11d,%ecx
roll $16,%ecx
movl %edx,%r11d
roll $16,%ecx
addl %edx,%ecx
leal -995338651(%rbx,%r10,1),%ebx
movl 0(%rsi),%r10d
xorl %eax,%r11d
movl 0(%rsi),%r10d
xorl %ecx,%r11d
addl %r11d,%ebx
roll $23,%ebx
movl %ecx,%r11d
addl %ecx,%ebx
movl 0(%rsi),%r10d
movl $0xffffffff,%r11d
xorl %edx,%r11d
leal -198630844(%rax,%r10,1),%eax
orl %ebx,%r11d
movl 28(%rsi),%r10d
xorl %ecx,%r11d
addl %r11d,%eax
movl 28(%rsi),%r10d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
leal 1126891415(%rdx,%r10,1),%edx
orl %eax,%r11d
movl 56(%rsi),%r10d
xorl %ebx,%r11d
addl %r11d,%edx
movl 56(%rsi),%r10d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
leal -1416354905(%rcx,%r10,1),%ecx
orl %edx,%r11d
movl 20(%rsi),%r10d
xorl %eax,%r11d
addl %r11d,%ecx
movl 20(%rsi),%r10d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
leal -57434055(%rbx,%r10,1),%ebx
orl %ecx,%r11d
movl 48(%rsi),%r10d
xorl %edx,%r11d
addl %r11d,%ebx
movl 48(%rsi),%r10d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
leal 1700485571(%rax,%r10,1),%eax
orl %ebx,%r11d
movl 12(%rsi),%r10d
xorl %ecx,%r11d
addl %r11d,%eax
movl 12(%rsi),%r10d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
leal -1894986606(%rdx,%r10,1),%edx
orl %eax,%r11d
movl 40(%rsi),%r10d
xorl %ebx,%r11d
addl %r11d,%edx
movl 40(%rsi),%r10d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
leal -1051523(%rcx,%r10,1),%ecx
orl %edx,%r11d
movl 4(%rsi),%r10d
xorl %eax,%r11d
addl %r11d,%ecx
movl 4(%rsi),%r10d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
leal -2054922799(%rbx,%r10,1),%ebx
orl %ecx,%r11d
movl 32(%rsi),%r10d
xorl %edx,%r11d
addl %r11d,%ebx
movl 32(%rsi),%r10d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
leal 1873313359(%rax,%r10,1),%eax
orl %ebx,%r11d
movl 60(%rsi),%r10d
xorl %ecx,%r11d
addl %r11d,%eax
movl 60(%rsi),%r10d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
leal -30611744(%rdx,%r10,1),%edx
orl %eax,%r11d
movl 24(%rsi),%r10d
xorl %ebx,%r11d
addl %r11d,%edx
movl 24(%rsi),%r10d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
leal -1560198380(%rcx,%r10,1),%ecx
orl %edx,%r11d
movl 52(%rsi),%r10d
xorl %eax,%r11d
addl %r11d,%ecx
movl 52(%rsi),%r10d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
leal 1309151649(%rbx,%r10,1),%ebx
orl %ecx,%r11d
movl 16(%rsi),%r10d
xorl %edx,%r11d
addl %r11d,%ebx
movl 16(%rsi),%r10d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
leal -145523070(%rax,%r10,1),%eax
orl %ebx,%r11d
movl 44(%rsi),%r10d
xorl %ecx,%r11d
addl %r11d,%eax
movl 44(%rsi),%r10d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
leal -1120210379(%rdx,%r10,1),%edx
orl %eax,%r11d
movl 8(%rsi),%r10d
xorl %ebx,%r11d
addl %r11d,%edx
movl 8(%rsi),%r10d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
leal 718787259(%rcx,%r10,1),%ecx
orl %edx,%r11d
movl 36(%rsi),%r10d
xorl %eax,%r11d
addl %r11d,%ecx
movl 36(%rsi),%r10d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
leal -343485551(%rbx,%r10,1),%ebx
orl %ecx,%r11d
movl 0(%rsi),%r10d
xorl %edx,%r11d
addl %r11d,%ebx
movl 0(%rsi),%r10d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
@ -660,11 +668,18 @@ md5_block_asm_data_order:
movl %edx,12(%rbp)
movq (%rsp),%r15
.cfi_restore %r15
movq 8(%rsp),%r14
.cfi_restore %r14
movq 16(%rsp),%r12
.cfi_restore %r12
movq 24(%rsp),%rbx
.cfi_restore %rbx
movq 32(%rsp),%rbp
.cfi_restore %rbp
addq $40,%rsp
.cfi_adjust_cfa_offset -40
.Lepilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size md5_block_asm_data_order,.-md5_block_asm_data_order

File diff suppressed because it is too large Load Diff

View File

@ -6,15 +6,29 @@
.globl rc4_md5_enc
.type rc4_md5_enc,@function
rc4_md5_enc:
.cfi_startproc
cmpq $0,%r9
je .Labort
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
subq $40,%rsp
.cfi_adjust_cfa_offset 40
.Lbody:
movq %rcx,%r11
movq %r9,%r12
@ -1249,13 +1263,21 @@ rc4_md5_enc:
movl %ecx,-4(%rdi)
movq 40(%rsp),%r15
.cfi_restore %r15
movq 48(%rsp),%r14
.cfi_restore %r14
movq 56(%rsp),%r13
.cfi_restore %r13
movq 64(%rsp),%r12
.cfi_restore %r12
movq 72(%rsp),%rbp
.cfi_restore %rbp
movq 80(%rsp),%rbx
.cfi_restore %rbx
leaq 88(%rsp),%rsp
.cfi_adjust_cfa_offset -88
.Lepilogue:
.Labort:
.byte 0xf3,0xc3
.cfi_endproc
.size rc4_md5_enc,.-rc4_md5_enc

View File

@ -10,9 +10,16 @@ RC4: orq %rsi,%rsi
jne .Lentry
.byte 0xf3,0xc3
.Lentry:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-24
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-32
.Lprologue:
movq %rsi,%r11
movq %rdx,%r12
@ -513,16 +520,21 @@ RC4: orq %rsi,%rsi
movl %ecx,-4(%rdi)
movq (%rsp),%r13
.cfi_restore %r13
movq 8(%rsp),%r12
.cfi_restore %r12
movq 16(%rsp),%rbx
.cfi_restore %rbx
addq $24,%rsp
.cfi_adjust_cfa_offset -24
.Lepilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size RC4,.-RC4
.globl private_RC4_set_key
.type private_RC4_set_key,@function
.globl RC4_set_key
.type RC4_set_key,@function
.align 16
private_RC4_set_key:
RC4_set_key:
leaq 8(%rdi),%rdi
leaq (%rdx,%rsi,1),%rdx
negq %rsi
@ -589,7 +601,7 @@ private_RC4_set_key:
movl %eax,-8(%rdi)
movl %eax,-4(%rdi)
.byte 0xf3,0xc3
.size private_RC4_set_key,.-private_RC4_set_key
.size RC4_set_key,.-RC4_set_key
.globl RC4_options
.type RC4_options,@function

View File

@ -6,15 +6,24 @@
.type rsaz_1024_sqr_avx2,@function
.align 64
rsaz_1024_sqr_avx2:
.cfi_startproc
leaq (%rsp),%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
vzeroupper
movq %rax,%rbp
.cfi_def_cfa_register %rbp
movq %rdx,%r13
subq $832,%rsp
movq %r13,%r15
@ -627,28 +636,46 @@ rsaz_1024_sqr_avx2:
vzeroall
movq %rbp,%rax
.cfi_def_cfa_register %rax
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lsqr_1024_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2
.globl rsaz_1024_mul_avx2
.type rsaz_1024_mul_avx2,@function
.align 64
rsaz_1024_mul_avx2:
.cfi_startproc
leaq (%rsp),%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
movq %rax,%rbp
.cfi_def_cfa_register %rbp
vzeroall
movq %rdx,%r13
subq $64,%rsp
@ -1164,15 +1191,24 @@ rsaz_1024_mul_avx2:
vzeroupper
movq %rbp,%rax
.cfi_def_cfa_register %rax
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lmul_1024_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2
.globl rsaz_1024_red2norm_avx2
.type rsaz_1024_red2norm_avx2,@function
@ -1557,8 +1593,10 @@ rsaz_1024_scatter5_avx2:
.type rsaz_1024_gather5_avx2,@function
.align 32
rsaz_1024_gather5_avx2:
.cfi_startproc
vzeroupper
movq %rsp,%r11
.cfi_def_cfa_register %r11
leaq -256(%rsp),%rsp
andq $-32,%rsp
leaq .Linc(%rip),%r10
@ -1667,7 +1705,10 @@ rsaz_1024_gather5_avx2:
vmovdqu %ymm0,(%rdi)
vzeroupper
leaq (%r11),%rsp
.cfi_def_cfa_register %rsp
.byte 0xf3,0xc3
.cfi_endproc
.LSEH_end_rsaz_1024_gather5:
.size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2
.globl rsaz_avx2_eligible

View File

@ -8,14 +8,28 @@
.type rsaz_512_sqr,@function
.align 32
rsaz_512_sqr:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
subq $128+24,%rsp
.cfi_adjust_cfa_offset 128+24
.Lsqr_body:
movq %rdx,%rbp
movq (%rsi),%rdx
@ -660,28 +674,51 @@ rsaz_512_sqr:
.Lsqr_tail:
leaq 128+24+48(%rsp),%rax
.cfi_def_cfa %rax,8
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lsqr_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size rsaz_512_sqr,.-rsaz_512_sqr
.globl rsaz_512_mul
.type rsaz_512_mul,@function
.align 32
rsaz_512_mul:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
subq $128+24,%rsp
.cfi_adjust_cfa_offset 128+24
.Lmul_body:
.byte 102,72,15,110,199
.byte 102,72,15,110,201
@ -743,28 +780,51 @@ rsaz_512_mul:
call __rsaz_512_subtract
leaq 128+24+48(%rsp),%rax
.cfi_def_cfa %rax,8
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lmul_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size rsaz_512_mul,.-rsaz_512_mul
.globl rsaz_512_mul_gather4
.type rsaz_512_mul_gather4,@function
.align 32
rsaz_512_mul_gather4:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
subq $152,%rsp
.cfi_adjust_cfa_offset 152
.Lmul_gather4_body:
movd %r9d,%xmm8
movdqa .Linc+16(%rip),%xmm1
@ -1153,29 +1213,52 @@ rsaz_512_mul_gather4:
call __rsaz_512_subtract
leaq 128+24+48(%rsp),%rax
.cfi_def_cfa %rax,8
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lmul_gather4_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4
.globl rsaz_512_mul_scatter4
.type rsaz_512_mul_scatter4,@function
.align 32
rsaz_512_mul_scatter4:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
movl %r9d,%r9d
subq $128+24,%rsp
.cfi_adjust_cfa_offset 128+24
.Lmul_scatter4_body:
leaq (%r8,%r9,8),%r8
.byte 102,72,15,110,199
@ -1250,28 +1333,51 @@ rsaz_512_mul_scatter4:
movq %r15,896(%rsi)
leaq 128+24+48(%rsp),%rax
.cfi_def_cfa %rax,8
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lmul_scatter4_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4
.globl rsaz_512_mul_by_one
.type rsaz_512_mul_by_one,@function
.align 32
rsaz_512_mul_by_one:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
subq $128+24,%rsp
.cfi_adjust_cfa_offset 128+24
.Lmul_by_one_body:
movl OPENSSL_ia32cap_P+8(%rip),%eax
movq %rdx,%rbp
@ -1314,15 +1420,24 @@ rsaz_512_mul_by_one:
movq %r15,56(%rdi)
leaq 128+24+48(%rsp),%rax
.cfi_def_cfa %rax,8
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lmul_by_one_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one
.type __rsaz_512_reduce,@function
.align 32

View File

@ -8,17 +8,22 @@
.type sha1_multi_block,@function
.align 32
sha1_multi_block:
.cfi_startproc
movq OPENSSL_ia32cap_P+4(%rip),%rcx
btq $61,%rcx
jc _shaext_shortcut
testl $268435456,%ecx
jnz _avx_shortcut
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbx,-24
subq $288,%rsp
andq $-256,%rsp
movq %rax,272(%rsp)
.cfi_escape 0x0f,0x06,0x77,0x90,0x02,0x06,0x23,0x08
.Lbody:
leaq K_XX_XX(%rip),%rbp
leaq 256(%rsp),%rbx
@ -2548,19 +2553,28 @@ sha1_multi_block:
.Ldone:
movq 272(%rsp),%rax
.cfi_def_cfa %rax,8
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size sha1_multi_block,.-sha1_multi_block
.type sha1_multi_block_shaext,@function
.align 32
sha1_multi_block_shaext:
.cfi_startproc
_shaext_shortcut:
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
subq $288,%rsp
shll $1,%edx
andq $-256,%rsp
@ -2916,14 +2930,19 @@ _shaext_shortcut:
.Ldone_shaext:
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_shaext:
.byte 0xf3,0xc3
.cfi_endproc
.size sha1_multi_block_shaext,.-sha1_multi_block_shaext
.type sha1_multi_block_avx,@function
.align 32
sha1_multi_block_avx:
.cfi_startproc
_avx_shortcut:
shrq $32,%rcx
cmpl $2,%edx
@ -2934,11 +2953,15 @@ _avx_shortcut:
.align 32
.Lavx:
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
subq $288,%rsp
andq $-256,%rsp
movq %rax,272(%rsp)
.cfi_escape 0x0f,0x06,0x77,0x90,0x02,0x06,0x23,0x08
.Lbody_avx:
leaq K_XX_XX(%rip),%rbp
leaq 256(%rsp),%rbx
@ -4988,27 +5011,41 @@ _avx_shortcut:
.Ldone_avx:
movq 272(%rsp),%rax
.cfi_def_cfa %rax,8
vzeroupper
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_avx:
.byte 0xf3,0xc3
.cfi_endproc
.size sha1_multi_block_avx,.-sha1_multi_block_avx
.type sha1_multi_block_avx2,@function
.align 32
sha1_multi_block_avx2:
.cfi_startproc
_avx2_shortcut:
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
subq $576,%rsp
andq $-256,%rsp
movq %rax,544(%rsp)
.cfi_escape 0x0f,0x06,0x77,0xa0,0x04,0x06,0x23,0x08
.Lbody_avx2:
leaq K_XX_XX(%rip),%rbp
shrl $1,%edx
@ -7195,16 +7232,25 @@ _avx2_shortcut:
.Ldone_avx2:
movq 544(%rsp),%rax
.cfi_def_cfa %rax,8
vzeroupper
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_avx2:
.byte 0xf3,0xc3
.cfi_endproc
.size sha1_multi_block_avx2,.-sha1_multi_block_avx2
.align 256

View File

@ -7,6 +7,7 @@
.type sha1_block_data_order,@function
.align 16
sha1_block_data_order:
.cfi_startproc
movl OPENSSL_ia32cap_P+0(%rip),%r9d
movl OPENSSL_ia32cap_P+4(%rip),%r8d
movl OPENSSL_ia32cap_P+8(%rip),%r10d
@ -27,17 +28,24 @@ sha1_block_data_order:
.align 16
.Lialu:
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
movq %rdi,%r8
subq $72,%rsp
movq %rsi,%r9
andq $-64,%rsp
movq %rdx,%r10
movq %rax,64(%rsp)
.cfi_escape 0x0f,0x06,0x77,0xc0,0x00,0x06,0x23,0x08
.Lprologue:
movl 0(%r8),%esi
@ -1232,19 +1240,28 @@ sha1_block_data_order:
jnz .Lloop
movq 64(%rsp),%rsi
.cfi_def_cfa %rsi,8
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size sha1_block_data_order,.-sha1_block_data_order
.type sha1_block_data_order_shaext,@function
.align 32
sha1_block_data_order_shaext:
_shaext_shortcut:
.cfi_startproc
movdqu (%rdi),%xmm0
movd 16(%rdi),%xmm1
movdqa K_XX_XX+160(%rip),%xmm3
@ -1406,20 +1423,27 @@ _shaext_shortcut:
pshufd $27,%xmm1,%xmm1
movdqu %xmm0,(%rdi)
movd %xmm1,16(%rdi)
.cfi_endproc
.byte 0xf3,0xc3
.size sha1_block_data_order_shaext,.-sha1_block_data_order_shaext
.type sha1_block_data_order_ssse3,@function
.align 16
sha1_block_data_order_ssse3:
_ssse3_shortcut:
movq %rsp,%rax
.cfi_startproc
movq %rsp,%r11
.cfi_def_cfa_register %r11
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
leaq -64(%rsp),%rsp
movq %rax,%r14
andq $-64,%rsp
movq %rdi,%r8
movq %rsi,%r9
@ -1427,7 +1451,7 @@ _ssse3_shortcut:
shlq $6,%r10
addq %r9,%r10
leaq K_XX_XX+64(%rip),%r11
leaq K_XX_XX+64(%rip),%r14
movl 0(%r8),%eax
movl 4(%r8),%ebx
@ -1439,8 +1463,8 @@ _ssse3_shortcut:
xorl %edx,%edi
andl %edi,%esi
movdqa 64(%r11),%xmm6
movdqa -64(%r11),%xmm9
movdqa 64(%r14),%xmm6
movdqa -64(%r14),%xmm9
movdqu 0(%r9),%xmm0
movdqu 16(%r9),%xmm1
movdqu 32(%r9),%xmm2
@ -1516,7 +1540,7 @@ _ssse3_shortcut:
pslld $2,%xmm9
pxor %xmm10,%xmm4
xorl %ebp,%edx
movdqa -64(%r11),%xmm10
movdqa -64(%r14),%xmm10
roll $5,%ecx
addl %edi,%ebx
andl %edx,%esi
@ -1577,7 +1601,7 @@ _ssse3_shortcut:
pslld $2,%xmm10
pxor %xmm8,%xmm5
xorl %eax,%ebp
movdqa -32(%r11),%xmm8
movdqa -32(%r14),%xmm8
roll $5,%edx
addl %edi,%ecx
andl %ebp,%esi
@ -1638,7 +1662,7 @@ _ssse3_shortcut:
pslld $2,%xmm8
pxor %xmm9,%xmm6
xorl %ebx,%eax
movdqa -32(%r11),%xmm9
movdqa -32(%r14),%xmm9
roll $5,%ebp
addl %edi,%edx
andl %eax,%esi
@ -1699,7 +1723,7 @@ _ssse3_shortcut:
pslld $2,%xmm9
pxor %xmm10,%xmm7
xorl %ecx,%ebx
movdqa -32(%r11),%xmm10
movdqa -32(%r14),%xmm10
roll $5,%eax
addl %edi,%ebp
andl %ebx,%esi
@ -1810,7 +1834,7 @@ _ssse3_shortcut:
pxor %xmm3,%xmm2
addl %esi,%eax
xorl %edx,%edi
movdqa 0(%r11),%xmm10
movdqa 0(%r14),%xmm10
rorl $7,%ecx
paddd %xmm1,%xmm9
addl %ebx,%eax
@ -2045,7 +2069,7 @@ _ssse3_shortcut:
pxor %xmm0,%xmm7
roll $5,%ebx
addl %esi,%eax
movdqa 32(%r11),%xmm9
movdqa 32(%r14),%xmm9
xorl %ecx,%edi
paddd %xmm6,%xmm8
xorl %edx,%ecx
@ -2336,8 +2360,8 @@ _ssse3_shortcut:
addl %edx,%ecx
cmpq %r10,%r9
je .Ldone_ssse3
movdqa 64(%r11),%xmm6
movdqa -64(%r11),%xmm9
movdqa 64(%r14),%xmm6
movdqa -64(%r14),%xmm9
movdqu 0(%r9),%xmm0
movdqu 16(%r9),%xmm1
movdqu 32(%r9),%xmm2
@ -2574,29 +2598,41 @@ _ssse3_shortcut:
movl %ecx,8(%r8)
movl %edx,12(%r8)
movl %ebp,16(%r8)
leaq (%r14),%rsi
movq -40(%rsi),%r14
movq -32(%rsi),%r13
movq -24(%rsi),%r12
movq -16(%rsi),%rbp
movq -8(%rsi),%rbx
leaq (%rsi),%rsp
movq -40(%r11),%r14
.cfi_restore %r14
movq -32(%r11),%r13
.cfi_restore %r13
movq -24(%r11),%r12
.cfi_restore %r12
movq -16(%r11),%rbp
.cfi_restore %rbp
movq -8(%r11),%rbx
.cfi_restore %rbx
leaq (%r11),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_ssse3:
.byte 0xf3,0xc3
.cfi_endproc
.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3
.type sha1_block_data_order_avx,@function
.align 16
sha1_block_data_order_avx:
_avx_shortcut:
movq %rsp,%rax
.cfi_startproc
movq %rsp,%r11
.cfi_def_cfa_register %r11
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
leaq -64(%rsp),%rsp
vzeroupper
movq %rax,%r14
andq $-64,%rsp
movq %rdi,%r8
movq %rsi,%r9
@ -2604,7 +2640,7 @@ _avx_shortcut:
shlq $6,%r10
addq %r9,%r10
leaq K_XX_XX+64(%rip),%r11
leaq K_XX_XX+64(%rip),%r14
movl 0(%r8),%eax
movl 4(%r8),%ebx
@ -2616,8 +2652,8 @@ _avx_shortcut:
xorl %edx,%edi
andl %edi,%esi
vmovdqa 64(%r11),%xmm6
vmovdqa -64(%r11),%xmm11
vmovdqa 64(%r14),%xmm6
vmovdqa -64(%r14),%xmm11
vmovdqu 0(%r9),%xmm0
vmovdqu 16(%r9),%xmm1
vmovdqu 32(%r9),%xmm2
@ -2742,7 +2778,7 @@ _avx_shortcut:
vpxor %xmm10,%xmm5,%xmm5
xorl %eax,%ebp
shldl $5,%edx,%edx
vmovdqa -32(%r11),%xmm11
vmovdqa -32(%r14),%xmm11
addl %edi,%ecx
andl %ebp,%esi
xorl %eax,%ebp
@ -2955,7 +2991,7 @@ _avx_shortcut:
addl %esi,%eax
xorl %edx,%edi
vpaddd %xmm1,%xmm11,%xmm9
vmovdqa 0(%r11),%xmm11
vmovdqa 0(%r14),%xmm11
shrdl $7,%ecx,%ecx
addl %ebx,%eax
vpxor %xmm8,%xmm2,%xmm2
@ -3174,7 +3210,7 @@ _avx_shortcut:
movl %ebx,%edi
xorl %edx,%esi
vpaddd %xmm6,%xmm11,%xmm9
vmovdqa 32(%r11),%xmm11
vmovdqa 32(%r14),%xmm11
shldl $5,%ebx,%ebx
addl %esi,%eax
vpxor %xmm8,%xmm7,%xmm7
@ -3453,8 +3489,8 @@ _avx_shortcut:
addl %edx,%ecx
cmpq %r10,%r9
je .Ldone_avx
vmovdqa 64(%r11),%xmm6
vmovdqa -64(%r11),%xmm11
vmovdqa 64(%r14),%xmm6
vmovdqa -64(%r14),%xmm11
vmovdqu 0(%r9),%xmm0
vmovdqu 16(%r9),%xmm1
vmovdqu 32(%r9),%xmm2
@ -3690,28 +3726,40 @@ _avx_shortcut:
movl %ecx,8(%r8)
movl %edx,12(%r8)
movl %ebp,16(%r8)
leaq (%r14),%rsi
movq -40(%rsi),%r14
movq -32(%rsi),%r13
movq -24(%rsi),%r12
movq -16(%rsi),%rbp
movq -8(%rsi),%rbx
leaq (%rsi),%rsp
movq -40(%r11),%r14
.cfi_restore %r14
movq -32(%r11),%r13
.cfi_restore %r13
movq -24(%r11),%r12
.cfi_restore %r12
movq -16(%r11),%rbp
.cfi_restore %rbp
movq -8(%r11),%rbx
.cfi_restore %rbx
leaq (%r11),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_avx:
.byte 0xf3,0xc3
.cfi_endproc
.size sha1_block_data_order_avx,.-sha1_block_data_order_avx
.type sha1_block_data_order_avx2,@function
.align 16
sha1_block_data_order_avx2:
_avx2_shortcut:
movq %rsp,%rax
.cfi_startproc
movq %rsp,%r11
.cfi_def_cfa_register %r11
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
vzeroupper
movq %rax,%r14
movq %rdi,%r8
movq %rsi,%r9
movq %rdx,%r10
@ -3721,7 +3769,7 @@ _avx2_shortcut:
leaq 64(%r9),%r13
andq $-128,%rsp
addq %r9,%r10
leaq K_XX_XX+64(%rip),%r11
leaq K_XX_XX+64(%rip),%r14
movl 0(%r8),%eax
cmpq %r10,%r13
@ -3730,7 +3778,7 @@ _avx2_shortcut:
movl 8(%r8),%ecx
movl 12(%r8),%edx
movl 16(%r8),%esi
vmovdqu 64(%r11),%ymm6
vmovdqu 64(%r14),%ymm6
vmovdqu (%r9),%xmm0
vmovdqu 16(%r9),%xmm1
@ -3744,7 +3792,7 @@ _avx2_shortcut:
vpshufb %ymm6,%ymm1,%ymm1
vinserti128 $1,48(%r13),%ymm3,%ymm3
vpshufb %ymm6,%ymm2,%ymm2
vmovdqu -64(%r11),%ymm11
vmovdqu -64(%r14),%ymm11
vpshufb %ymm6,%ymm3,%ymm3
vpaddd %ymm11,%ymm0,%ymm4
@ -3776,7 +3824,7 @@ _avx2_shortcut:
vpxor %ymm3,%ymm8,%ymm8
vpxor %ymm8,%ymm5,%ymm5
vpsrld $31,%ymm5,%ymm8
vmovdqu -32(%r11),%ymm11
vmovdqu -32(%r14),%ymm11
vpslldq $12,%ymm5,%ymm10
vpaddd %ymm5,%ymm5,%ymm5
vpsrld $30,%ymm10,%ymm9
@ -3930,7 +3978,7 @@ _avx2_shortcut:
addl -56(%r13),%ebp
andnl %esi,%ebx,%edi
vpxor %ymm3,%ymm2,%ymm2
vmovdqu 0(%r11),%ymm11
vmovdqu 0(%r14),%ymm11
addl %ecx,%ebp
rorxl $27,%ebx,%r12d
rorxl $2,%ebx,%ecx
@ -4161,7 +4209,7 @@ _avx2_shortcut:
addl -116(%r13),%eax
leal (%rax,%rbx,1),%eax
vpxor %ymm0,%ymm7,%ymm7
vmovdqu 32(%r11),%ymm11
vmovdqu 32(%r14),%ymm11
rorxl $27,%ebp,%r12d
rorxl $2,%ebp,%ebx
xorl %ecx,%ebp
@ -4606,7 +4654,7 @@ _avx2_shortcut:
cmpq %r10,%r9
je .Ldone_avx2
vmovdqu 64(%r11),%ymm6
vmovdqu 64(%r14),%ymm6
cmpq %r10,%rdi
ja .Last_avx2
@ -4822,7 +4870,7 @@ _avx2_shortcut:
xorl %ebx,%eax
addl %r12d,%esi
xorl %ecx,%eax
vmovdqu -64(%r11),%ymm11
vmovdqu -64(%r14),%ymm11
vpshufb %ymm6,%ymm0,%ymm0
addl 68(%r13),%edx
leal (%rdx,%rax,1),%edx
@ -5178,7 +5226,7 @@ _avx2_shortcut:
xorl %ebp,%esi
addl %r12d,%edx
vpsrld $31,%ymm5,%ymm8
vmovdqu -32(%r11),%ymm11
vmovdqu -32(%r14),%ymm11
xorl %ebx,%esi
addl 104(%r13),%ecx
leal (%rcx,%rsi,1),%ecx
@ -5371,15 +5419,21 @@ _avx2_shortcut:
.Ldone_avx2:
vzeroupper
leaq (%r14),%rsi
movq -40(%rsi),%r14
movq -32(%rsi),%r13
movq -24(%rsi),%r12
movq -16(%rsi),%rbp
movq -8(%rsi),%rbx
leaq (%rsi),%rsp
movq -40(%r11),%r14
.cfi_restore %r14
movq -32(%r11),%r13
.cfi_restore %r13
movq -24(%r11),%r12
.cfi_restore %r12
movq -16(%r11),%rbp
.cfi_restore %rbp
movq -8(%r11),%rbx
.cfi_restore %rbx
leaq (%r11),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_avx2:
.byte 0xf3,0xc3
.cfi_endproc
.size sha1_block_data_order_avx2,.-sha1_block_data_order_avx2
.align 64
K_XX_XX:

View File

@ -8,17 +8,22 @@
.type sha256_multi_block,@function
.align 32
sha256_multi_block:
.cfi_startproc
movq OPENSSL_ia32cap_P+4(%rip),%rcx
btq $61,%rcx
jc _shaext_shortcut
testl $268435456,%ecx
jnz _avx_shortcut
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
subq $288,%rsp
andq $-256,%rsp
movq %rax,272(%rsp)
.cfi_escape 0x0f,0x06,0x77,0x90,0x02,0x06,0x23,0x08
.Lbody:
leaq K256+128(%rip),%rbp
leaq 256(%rsp),%rbx
@ -2617,19 +2622,28 @@ sha256_multi_block:
.Ldone:
movq 272(%rsp),%rax
.cfi_def_cfa %rax,8
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size sha256_multi_block,.-sha256_multi_block
.type sha256_multi_block_shaext,@function
.align 32
sha256_multi_block_shaext:
.cfi_startproc
_shaext_shortcut:
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
subq $288,%rsp
shll $1,%edx
andq $-256,%rsp
@ -3104,14 +3118,19 @@ _shaext_shortcut:
.Ldone_shaext:
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_shaext:
.byte 0xf3,0xc3
.cfi_endproc
.size sha256_multi_block_shaext,.-sha256_multi_block_shaext
.type sha256_multi_block_avx,@function
.align 32
sha256_multi_block_avx:
.cfi_startproc
_avx_shortcut:
shrq $32,%rcx
cmpl $2,%edx
@ -3122,11 +3141,15 @@ _avx_shortcut:
.align 32
.Lavx:
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
subq $288,%rsp
andq $-256,%rsp
movq %rax,272(%rsp)
.cfi_escape 0x0f,0x06,0x77,0x90,0x02,0x06,0x23,0x08
.Lbody_avx:
leaq K256+128(%rip),%rbp
leaq 256(%rsp),%rbx
@ -5355,27 +5378,41 @@ _avx_shortcut:
.Ldone_avx:
movq 272(%rsp),%rax
.cfi_def_cfa %rax,8
vzeroupper
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_avx:
.byte 0xf3,0xc3
.cfi_endproc
.size sha256_multi_block_avx,.-sha256_multi_block_avx
.type sha256_multi_block_avx2,@function
.align 32
sha256_multi_block_avx2:
.cfi_startproc
_avx2_shortcut:
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
subq $576,%rsp
andq $-256,%rsp
movq %rax,544(%rsp)
.cfi_escape 0x0f,0x06,0x77,0xa0,0x04,0x06,0x23,0x08
.Lbody_avx2:
leaq K256+128(%rip),%rbp
leaq 128(%rdi),%rdi
@ -7740,16 +7777,25 @@ _avx2_shortcut:
.Ldone_avx2:
movq 544(%rsp),%rax
.cfi_def_cfa %rax,8
vzeroupper
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_avx2:
.byte 0xf3,0xc3
.cfi_endproc
.size sha256_multi_block_avx2,.-sha256_multi_block_avx2
.align 256
K256:

View File

@ -7,6 +7,7 @@
.type sha256_block_data_order,@function
.align 16
sha256_block_data_order:
.cfi_startproc
leaq OPENSSL_ia32cap_P(%rip),%r11
movl 0(%r11),%r9d
movl 4(%r11),%r10d
@ -23,13 +24,20 @@ sha256_block_data_order:
je .Lavx_shortcut
testl $512,%r10d
jnz .Lssse3_shortcut
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
movq %rsp,%r11
.cfi_offset %r15,-56
shlq $4,%rdx
subq $64+32,%rsp
leaq (%rsi,%rdx,4),%rdx
@ -37,7 +45,8 @@ sha256_block_data_order:
movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp)
movq %r11,64+24(%rsp)
movq %rax,88(%rsp)
.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08
.Lprologue:
movl 0(%rdi),%eax
@ -1701,16 +1710,25 @@ sha256_block_data_order:
movl %r11d,28(%rdi)
jb .Lloop
movq 64+24(%rsp),%rsi
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
movq 88(%rsp),%rsi
.cfi_def_cfa %rsi,8
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size sha256_block_data_order,.-sha256_block_data_order
.align 64
.type K256,@object
@ -1965,14 +1983,22 @@ _shaext_shortcut:
.type sha256_block_data_order_ssse3,@function
.align 64
sha256_block_data_order_ssse3:
.cfi_startproc
.Lssse3_shortcut:
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
movq %rsp,%r11
.cfi_offset %r15,-56
shlq $4,%rdx
subq $96,%rsp
leaq (%rsi,%rdx,4),%rdx
@ -1980,7 +2006,8 @@ sha256_block_data_order_ssse3:
movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp)
movq %r11,64+24(%rsp)
movq %rax,88(%rsp)
.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08
.Lprologue_ssse3:
movl 0(%rdi),%eax
@ -3046,28 +3073,45 @@ sha256_block_data_order_ssse3:
movl %r11d,28(%rdi)
jb .Lloop_ssse3
movq 64+24(%rsp),%rsi
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
movq 88(%rsp),%rsi
.cfi_def_cfa %rsi,8
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_ssse3:
.byte 0xf3,0xc3
.cfi_endproc
.size sha256_block_data_order_ssse3,.-sha256_block_data_order_ssse3
.type sha256_block_data_order_avx,@function
.align 64
sha256_block_data_order_avx:
.cfi_startproc
.Lavx_shortcut:
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
movq %rsp,%r11
.cfi_offset %r15,-56
shlq $4,%rdx
subq $96,%rsp
leaq (%rsi,%rdx,4),%rdx
@ -3075,7 +3119,8 @@ sha256_block_data_order_avx:
movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp)
movq %r11,64+24(%rsp)
movq %rax,88(%rsp)
.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08
.Lprologue_avx:
vzeroupper
@ -4102,29 +4147,46 @@ sha256_block_data_order_avx:
movl %r11d,28(%rdi)
jb .Lloop_avx
movq 64+24(%rsp),%rsi
movq 88(%rsp),%rsi
.cfi_def_cfa %rsi,8
vzeroupper
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_avx:
.byte 0xf3,0xc3
.cfi_endproc
.size sha256_block_data_order_avx,.-sha256_block_data_order_avx
.type sha256_block_data_order_avx2,@function
.align 64
sha256_block_data_order_avx2:
.cfi_startproc
.Lavx2_shortcut:
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
movq %rsp,%r11
.cfi_offset %r15,-56
subq $544,%rsp
shlq $4,%rdx
andq $-1024,%rsp
@ -4133,7 +4195,8 @@ sha256_block_data_order_avx2:
movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp)
movq %r11,64+24(%rsp)
movq %rax,88(%rsp)
.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08
.Lprologue_avx2:
vzeroupper
@ -5346,15 +5409,24 @@ sha256_block_data_order_avx2:
.Ldone_avx2:
leaq (%rbp),%rsp
movq 64+24(%rsp),%rsi
movq 88(%rsp),%rsi
.cfi_def_cfa %rsi,8
vzeroupper
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_avx2:
.byte 0xf3,0xc3
.cfi_endproc
.size sha256_block_data_order_avx2,.-sha256_block_data_order_avx2

View File

@ -7,6 +7,7 @@
.type sha512_block_data_order,@function
.align 16
sha512_block_data_order:
.cfi_startproc
leaq OPENSSL_ia32cap_P(%rip),%r11
movl 0(%r11),%r9d
movl 4(%r11),%r10d
@ -21,13 +22,20 @@ sha512_block_data_order:
orl %r9d,%r10d
cmpl $1342177792,%r10d
je .Lavx_shortcut
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
movq %rsp,%r11
.cfi_offset %r15,-56
shlq $4,%rdx
subq $128+32,%rsp
leaq (%rsi,%rdx,8),%rdx
@ -35,7 +43,8 @@ sha512_block_data_order:
movq %rdi,128+0(%rsp)
movq %rsi,128+8(%rsp)
movq %rdx,128+16(%rsp)
movq %r11,128+24(%rsp)
movq %rax,152(%rsp)
.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08
.Lprologue:
movq 0(%rdi),%rax
@ -1699,16 +1708,25 @@ sha512_block_data_order:
movq %r11,56(%rdi)
jb .Lloop
movq 128+24(%rsp),%rsi
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
movq 152(%rsp),%rsi
.cfi_def_cfa %rsi,8
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size sha512_block_data_order,.-sha512_block_data_order
.align 64
.type K512,@object
@ -1800,14 +1818,22 @@ K512:
.type sha512_block_data_order_xop,@function
.align 64
sha512_block_data_order_xop:
.cfi_startproc
.Lxop_shortcut:
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
movq %rsp,%r11
.cfi_offset %r15,-56
shlq $4,%rdx
subq $160,%rsp
leaq (%rsi,%rdx,8),%rdx
@ -1815,7 +1841,8 @@ sha512_block_data_order_xop:
movq %rdi,128+0(%rsp)
movq %rsi,128+8(%rsp)
movq %rdx,128+16(%rsp)
movq %r11,128+24(%rsp)
movq %rax,152(%rsp)
.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08
.Lprologue_xop:
vzeroupper
@ -2868,29 +2895,46 @@ sha512_block_data_order_xop:
movq %r11,56(%rdi)
jb .Lloop_xop
movq 128+24(%rsp),%rsi
movq 152(%rsp),%rsi
.cfi_def_cfa %rsi,8
vzeroupper
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_xop:
.byte 0xf3,0xc3
.cfi_endproc
.size sha512_block_data_order_xop,.-sha512_block_data_order_xop
.type sha512_block_data_order_avx,@function
.align 64
sha512_block_data_order_avx:
.cfi_startproc
.Lavx_shortcut:
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
movq %rsp,%r11
.cfi_offset %r15,-56
shlq $4,%rdx
subq $160,%rsp
leaq (%rsi,%rdx,8),%rdx
@ -2898,7 +2942,8 @@ sha512_block_data_order_avx:
movq %rdi,128+0(%rsp)
movq %rsi,128+8(%rsp)
movq %rdx,128+16(%rsp)
movq %r11,128+24(%rsp)
movq %rax,152(%rsp)
.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08
.Lprologue_avx:
vzeroupper
@ -4015,29 +4060,46 @@ sha512_block_data_order_avx:
movq %r11,56(%rdi)
jb .Lloop_avx
movq 128+24(%rsp),%rsi
movq 152(%rsp),%rsi
.cfi_def_cfa %rsi,8
vzeroupper
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_avx:
.byte 0xf3,0xc3
.cfi_endproc
.size sha512_block_data_order_avx,.-sha512_block_data_order_avx
.type sha512_block_data_order_avx2,@function
.align 64
sha512_block_data_order_avx2:
.cfi_startproc
.Lavx2_shortcut:
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
movq %rsp,%r11
.cfi_offset %r15,-56
subq $1312,%rsp
shlq $4,%rdx
andq $-2048,%rsp
@ -4046,7 +4108,8 @@ sha512_block_data_order_avx2:
movq %rdi,128+0(%rsp)
movq %rsi,128+8(%rsp)
movq %rdx,128+16(%rsp)
movq %r11,128+24(%rsp)
movq %rax,152(%rsp)
.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08
.Lprologue_avx2:
vzeroupper
@ -5353,15 +5416,24 @@ sha512_block_data_order_avx2:
.Ldone_avx2:
leaq (%rbp),%rsp
movq 128+24(%rsp),%rsi
movq 152(%rsp),%rsi
.cfi_def_cfa %rsi,8
vzeroupper
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_avx2:
.byte 0xf3,0xc3
.cfi_endproc
.size sha512_block_data_order_avx2,.-sha512_block_data_order_avx2

View File

@ -6,14 +6,22 @@
.type whirlpool_block,@function
.align 16
whirlpool_block:
.cfi_startproc
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
movq %rsp,%r11
subq $128+40,%rsp
andq $-64,%rsp
@ -21,7 +29,8 @@ whirlpool_block:
movq %rdi,0(%r10)
movq %rsi,8(%r10)
movq %rdx,16(%r10)
movq %r11,32(%r10)
movq %rax,32(%r10)
.cfi_escape 0x0f,0x06,0x77,0xa0,0x01,0x06,0x23,0x08
.Lprologue:
movq %r10,%rbx
@ -581,15 +590,24 @@ whirlpool_block:
jmp .Louterloop
.Lalldone:
movq 32(%rbx),%rsi
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
.cfi_def_cfa %rsi,8
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size whirlpool_block,.-whirlpool_block
.align 64

View File

@ -0,0 +1,794 @@
/* $FreeBSD$ */
/* Do not modify. This file is auto-generated from x25519-x86_64.pl. */
.text
.globl x25519_fe51_mul
.type x25519_fe51_mul,@function
.align 32
x25519_fe51_mul:
.cfi_startproc
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
leaq -40(%rsp),%rsp
.cfi_adjust_cfa_offset 40
.Lfe51_mul_body:
movq 0(%rsi),%rax
movq 0(%rdx),%r11
movq 8(%rdx),%r12
movq 16(%rdx),%r13
movq 24(%rdx),%rbp
movq 32(%rdx),%r14
movq %rdi,32(%rsp)
movq %rax,%rdi
mulq %r11
movq %r11,0(%rsp)
movq %rax,%rbx
movq %rdi,%rax
movq %rdx,%rcx
mulq %r12
movq %r12,8(%rsp)
movq %rax,%r8
movq %rdi,%rax
leaq (%r14,%r14,8),%r15
movq %rdx,%r9
mulq %r13
movq %r13,16(%rsp)
movq %rax,%r10
movq %rdi,%rax
leaq (%r14,%r15,2),%rdi
movq %rdx,%r11
mulq %rbp
movq %rax,%r12
movq 0(%rsi),%rax
movq %rdx,%r13
mulq %r14
movq %rax,%r14
movq 8(%rsi),%rax
movq %rdx,%r15
mulq %rdi
addq %rax,%rbx
movq 16(%rsi),%rax
adcq %rdx,%rcx
mulq %rdi
addq %rax,%r8
movq 24(%rsi),%rax
adcq %rdx,%r9
mulq %rdi
addq %rax,%r10
movq 32(%rsi),%rax
adcq %rdx,%r11
mulq %rdi
imulq $19,%rbp,%rdi
addq %rax,%r12
movq 8(%rsi),%rax
adcq %rdx,%r13
mulq %rbp
movq 16(%rsp),%rbp
addq %rax,%r14
movq 16(%rsi),%rax
adcq %rdx,%r15
mulq %rdi
addq %rax,%rbx
movq 24(%rsi),%rax
adcq %rdx,%rcx
mulq %rdi
addq %rax,%r8
movq 32(%rsi),%rax
adcq %rdx,%r9
mulq %rdi
imulq $19,%rbp,%rdi
addq %rax,%r10
movq 8(%rsi),%rax
adcq %rdx,%r11
mulq %rbp
addq %rax,%r12
movq 16(%rsi),%rax
adcq %rdx,%r13
mulq %rbp
movq 8(%rsp),%rbp
addq %rax,%r14
movq 24(%rsi),%rax
adcq %rdx,%r15
mulq %rdi
addq %rax,%rbx
movq 32(%rsi),%rax
adcq %rdx,%rcx
mulq %rdi
addq %rax,%r8
movq 8(%rsi),%rax
adcq %rdx,%r9
mulq %rbp
imulq $19,%rbp,%rdi
addq %rax,%r10
movq 16(%rsi),%rax
adcq %rdx,%r11
mulq %rbp
addq %rax,%r12
movq 24(%rsi),%rax
adcq %rdx,%r13
mulq %rbp
movq 0(%rsp),%rbp
addq %rax,%r14
movq 32(%rsi),%rax
adcq %rdx,%r15
mulq %rdi
addq %rax,%rbx
movq 8(%rsi),%rax
adcq %rdx,%rcx
mulq %rbp
addq %rax,%r8
movq 16(%rsi),%rax
adcq %rdx,%r9
mulq %rbp
addq %rax,%r10
movq 24(%rsi),%rax
adcq %rdx,%r11
mulq %rbp
addq %rax,%r12
movq 32(%rsi),%rax
adcq %rdx,%r13
mulq %rbp
addq %rax,%r14
adcq %rdx,%r15
movq 32(%rsp),%rdi
jmp .Lreduce51
.Lfe51_mul_epilogue:
.cfi_endproc
.size x25519_fe51_mul,.-x25519_fe51_mul
.globl x25519_fe51_sqr
.type x25519_fe51_sqr,@function
.align 32
x25519_fe51_sqr:
.cfi_startproc
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
leaq -40(%rsp),%rsp
.cfi_adjust_cfa_offset 40
.Lfe51_sqr_body:
movq 0(%rsi),%rax
movq 16(%rsi),%r15
movq 32(%rsi),%rbp
movq %rdi,32(%rsp)
leaq (%rax,%rax,1),%r14
mulq %rax
movq %rax,%rbx
movq 8(%rsi),%rax
movq %rdx,%rcx
mulq %r14
movq %rax,%r8
movq %r15,%rax
movq %r15,0(%rsp)
movq %rdx,%r9
mulq %r14
movq %rax,%r10
movq 24(%rsi),%rax
movq %rdx,%r11
imulq $19,%rbp,%rdi
mulq %r14
movq %rax,%r12
movq %rbp,%rax
movq %rdx,%r13
mulq %r14
movq %rax,%r14
movq %rbp,%rax
movq %rdx,%r15
mulq %rdi
addq %rax,%r12
movq 8(%rsi),%rax
adcq %rdx,%r13
movq 24(%rsi),%rsi
leaq (%rax,%rax,1),%rbp
mulq %rax
addq %rax,%r10
movq 0(%rsp),%rax
adcq %rdx,%r11
mulq %rbp
addq %rax,%r12
movq %rbp,%rax
adcq %rdx,%r13
mulq %rsi
addq %rax,%r14
movq %rbp,%rax
adcq %rdx,%r15
imulq $19,%rsi,%rbp
mulq %rdi
addq %rax,%rbx
leaq (%rsi,%rsi,1),%rax
adcq %rdx,%rcx
mulq %rdi
addq %rax,%r10
movq %rsi,%rax
adcq %rdx,%r11
mulq %rbp
addq %rax,%r8
movq 0(%rsp),%rax
adcq %rdx,%r9
leaq (%rax,%rax,1),%rsi
mulq %rax
addq %rax,%r14
movq %rbp,%rax
adcq %rdx,%r15
mulq %rsi
addq %rax,%rbx
movq %rsi,%rax
adcq %rdx,%rcx
mulq %rdi
addq %rax,%r8
adcq %rdx,%r9
movq 32(%rsp),%rdi
jmp .Lreduce51
.align 32
.Lreduce51:
movq $0x7ffffffffffff,%rbp
movq %r10,%rdx
shrq $51,%r10
shlq $13,%r11
andq %rbp,%rdx
orq %r10,%r11
addq %r11,%r12
adcq $0,%r13
movq %rbx,%rax
shrq $51,%rbx
shlq $13,%rcx
andq %rbp,%rax
orq %rbx,%rcx
addq %rcx,%r8
adcq $0,%r9
movq %r12,%rbx
shrq $51,%r12
shlq $13,%r13
andq %rbp,%rbx
orq %r12,%r13
addq %r13,%r14
adcq $0,%r15
movq %r8,%rcx
shrq $51,%r8
shlq $13,%r9
andq %rbp,%rcx
orq %r8,%r9
addq %r9,%rdx
movq %r14,%r10
shrq $51,%r14
shlq $13,%r15
andq %rbp,%r10
orq %r14,%r15
leaq (%r15,%r15,8),%r14
leaq (%r15,%r14,2),%r15
addq %r15,%rax
movq %rdx,%r8
andq %rbp,%rdx
shrq $51,%r8
addq %r8,%rbx
movq %rax,%r9
andq %rbp,%rax
shrq $51,%r9
addq %r9,%rcx
movq %rax,0(%rdi)
movq %rcx,8(%rdi)
movq %rdx,16(%rdi)
movq %rbx,24(%rdi)
movq %r10,32(%rdi)
movq 40(%rsp),%r15
.cfi_restore %r15
movq 48(%rsp),%r14
.cfi_restore %r14
movq 56(%rsp),%r13
.cfi_restore %r13
movq 64(%rsp),%r12
.cfi_restore %r12
movq 72(%rsp),%rbx
.cfi_restore %rbx
movq 80(%rsp),%rbp
.cfi_restore %rbp
leaq 88(%rsp),%rsp
.cfi_adjust_cfa_offset 88
.Lfe51_sqr_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size x25519_fe51_sqr,.-x25519_fe51_sqr
.globl x25519_fe51_mul121666
.type x25519_fe51_mul121666,@function
.align 32
x25519_fe51_mul121666:
.cfi_startproc
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
leaq -40(%rsp),%rsp
.cfi_adjust_cfa_offset 40
.Lfe51_mul121666_body:
movl $121666,%eax
mulq 0(%rsi)
movq %rax,%rbx
movl $121666,%eax
movq %rdx,%rcx
mulq 8(%rsi)
movq %rax,%r8
movl $121666,%eax
movq %rdx,%r9
mulq 16(%rsi)
movq %rax,%r10
movl $121666,%eax
movq %rdx,%r11
mulq 24(%rsi)
movq %rax,%r12
movl $121666,%eax
movq %rdx,%r13
mulq 32(%rsi)
movq %rax,%r14
movq %rdx,%r15
jmp .Lreduce51
.Lfe51_mul121666_epilogue:
.cfi_endproc
.size x25519_fe51_mul121666,.-x25519_fe51_mul121666
.globl x25519_fe64_eligible
.type x25519_fe64_eligible,@function
.align 32
x25519_fe64_eligible:
movl OPENSSL_ia32cap_P+8(%rip),%ecx
xorl %eax,%eax
andl $0x80100,%ecx
cmpl $0x80100,%ecx
cmovel %ecx,%eax
.byte 0xf3,0xc3
.size x25519_fe64_eligible,.-x25519_fe64_eligible
.globl x25519_fe64_mul
.type x25519_fe64_mul,@function
.align 32
x25519_fe64_mul:
.cfi_startproc
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
pushq %rdi
.cfi_adjust_cfa_offset 8
.cfi_offset %rdi,-64
leaq -16(%rsp),%rsp
.cfi_adjust_cfa_offset 16
.Lfe64_mul_body:
movq %rdx,%rax
movq 0(%rdx),%rbp
movq 0(%rsi),%rdx
movq 8(%rax),%rcx
movq 16(%rax),%r14
movq 24(%rax),%r15
mulxq %rbp,%r8,%rax
xorl %edi,%edi
mulxq %rcx,%r9,%rbx
adcxq %rax,%r9
mulxq %r14,%r10,%rax
adcxq %rbx,%r10
mulxq %r15,%r11,%r12
movq 8(%rsi),%rdx
adcxq %rax,%r11
movq %r14,(%rsp)
adcxq %rdi,%r12
mulxq %rbp,%rax,%rbx
adoxq %rax,%r9
adcxq %rbx,%r10
mulxq %rcx,%rax,%rbx
adoxq %rax,%r10
adcxq %rbx,%r11
mulxq %r14,%rax,%rbx
adoxq %rax,%r11
adcxq %rbx,%r12
mulxq %r15,%rax,%r13
movq 16(%rsi),%rdx
adoxq %rax,%r12
adcxq %rdi,%r13
adoxq %rdi,%r13
mulxq %rbp,%rax,%rbx
adcxq %rax,%r10
adoxq %rbx,%r11
mulxq %rcx,%rax,%rbx
adcxq %rax,%r11
adoxq %rbx,%r12
mulxq %r14,%rax,%rbx
adcxq %rax,%r12
adoxq %rbx,%r13
mulxq %r15,%rax,%r14
movq 24(%rsi),%rdx
adcxq %rax,%r13
adoxq %rdi,%r14
adcxq %rdi,%r14
mulxq %rbp,%rax,%rbx
adoxq %rax,%r11
adcxq %rbx,%r12
mulxq %rcx,%rax,%rbx
adoxq %rax,%r12
adcxq %rbx,%r13
mulxq (%rsp),%rax,%rbx
adoxq %rax,%r13
adcxq %rbx,%r14
mulxq %r15,%rax,%r15
movl $38,%edx
adoxq %rax,%r14
adcxq %rdi,%r15
adoxq %rdi,%r15
jmp .Lreduce64
.Lfe64_mul_epilogue:
.cfi_endproc
.size x25519_fe64_mul,.-x25519_fe64_mul
.globl x25519_fe64_sqr
.type x25519_fe64_sqr,@function
.align 32
x25519_fe64_sqr:
.cfi_startproc
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
pushq %rdi
.cfi_adjust_cfa_offset 8
.cfi_offset %rdi,-64
leaq -16(%rsp),%rsp
.cfi_adjust_cfa_offset 16
.Lfe64_sqr_body:
movq 0(%rsi),%rdx
movq 8(%rsi),%rcx
movq 16(%rsi),%rbp
movq 24(%rsi),%rsi
mulxq %rdx,%r8,%r15
mulxq %rcx,%r9,%rax
xorl %edi,%edi
mulxq %rbp,%r10,%rbx
adcxq %rax,%r10
mulxq %rsi,%r11,%r12
movq %rcx,%rdx
adcxq %rbx,%r11
adcxq %rdi,%r12
mulxq %rbp,%rax,%rbx
adoxq %rax,%r11
adcxq %rbx,%r12
mulxq %rsi,%rax,%r13
movq %rbp,%rdx
adoxq %rax,%r12
adcxq %rdi,%r13
mulxq %rsi,%rax,%r14
movq %rcx,%rdx
adoxq %rax,%r13
adcxq %rdi,%r14
adoxq %rdi,%r14
adcxq %r9,%r9
adoxq %r15,%r9
adcxq %r10,%r10
mulxq %rdx,%rax,%rbx
movq %rbp,%rdx
adcxq %r11,%r11
adoxq %rax,%r10
adcxq %r12,%r12
adoxq %rbx,%r11
mulxq %rdx,%rax,%rbx
movq %rsi,%rdx
adcxq %r13,%r13
adoxq %rax,%r12
adcxq %r14,%r14
adoxq %rbx,%r13
mulxq %rdx,%rax,%r15
movl $38,%edx
adoxq %rax,%r14
adcxq %rdi,%r15
adoxq %rdi,%r15
jmp .Lreduce64
.align 32
.Lreduce64:
mulxq %r12,%rax,%rbx
adcxq %rax,%r8
adoxq %rbx,%r9
mulxq %r13,%rax,%rbx
adcxq %rax,%r9
adoxq %rbx,%r10
mulxq %r14,%rax,%rbx
adcxq %rax,%r10
adoxq %rbx,%r11
mulxq %r15,%rax,%r12
adcxq %rax,%r11
adoxq %rdi,%r12
adcxq %rdi,%r12
movq 16(%rsp),%rdi
imulq %rdx,%r12
addq %r12,%r8
adcq $0,%r9
adcq $0,%r10
adcq $0,%r11
sbbq %rax,%rax
andq $38,%rax
addq %rax,%r8
movq %r9,8(%rdi)
movq %r10,16(%rdi)
movq %r11,24(%rdi)
movq %r8,0(%rdi)
movq 24(%rsp),%r15
.cfi_restore %r15
movq 32(%rsp),%r14
.cfi_restore %r14
movq 40(%rsp),%r13
.cfi_restore %r13
movq 48(%rsp),%r12
.cfi_restore %r12
movq 56(%rsp),%rbx
.cfi_restore %rbx
movq 64(%rsp),%rbp
.cfi_restore %rbp
leaq 72(%rsp),%rsp
.cfi_adjust_cfa_offset 88
.Lfe64_sqr_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size x25519_fe64_sqr,.-x25519_fe64_sqr
.globl x25519_fe64_mul121666
.type x25519_fe64_mul121666,@function
.align 32
x25519_fe64_mul121666:
.Lfe64_mul121666_body:
movl $121666,%edx
mulxq 0(%rsi),%r8,%rcx
mulxq 8(%rsi),%r9,%rax
addq %rcx,%r9
mulxq 16(%rsi),%r10,%rcx
adcq %rax,%r10
mulxq 24(%rsi),%r11,%rax
adcq %rcx,%r11
adcq $0,%rax
imulq $38,%rax,%rax
addq %rax,%r8
adcq $0,%r9
adcq $0,%r10
adcq $0,%r11
sbbq %rax,%rax
andq $38,%rax
addq %rax,%r8
movq %r9,8(%rdi)
movq %r10,16(%rdi)
movq %r11,24(%rdi)
movq %r8,0(%rdi)
.Lfe64_mul121666_epilogue:
.byte 0xf3,0xc3
.size x25519_fe64_mul121666,.-x25519_fe64_mul121666
.globl x25519_fe64_add
.type x25519_fe64_add,@function
.align 32
x25519_fe64_add:
.Lfe64_add_body:
movq 0(%rsi),%r8
movq 8(%rsi),%r9
movq 16(%rsi),%r10
movq 24(%rsi),%r11
addq 0(%rdx),%r8
adcq 8(%rdx),%r9
adcq 16(%rdx),%r10
adcq 24(%rdx),%r11
sbbq %rax,%rax
andq $38,%rax
addq %rax,%r8
adcq $0,%r9
adcq $0,%r10
movq %r9,8(%rdi)
adcq $0,%r11
movq %r10,16(%rdi)
sbbq %rax,%rax
movq %r11,24(%rdi)
andq $38,%rax
addq %rax,%r8
movq %r8,0(%rdi)
.Lfe64_add_epilogue:
.byte 0xf3,0xc3
.size x25519_fe64_add,.-x25519_fe64_add
.globl x25519_fe64_sub
.type x25519_fe64_sub,@function
.align 32
x25519_fe64_sub:
.Lfe64_sub_body:
movq 0(%rsi),%r8
movq 8(%rsi),%r9
movq 16(%rsi),%r10
movq 24(%rsi),%r11
subq 0(%rdx),%r8
sbbq 8(%rdx),%r9
sbbq 16(%rdx),%r10
sbbq 24(%rdx),%r11
sbbq %rax,%rax
andq $38,%rax
subq %rax,%r8
sbbq $0,%r9
sbbq $0,%r10
movq %r9,8(%rdi)
sbbq $0,%r11
movq %r10,16(%rdi)
sbbq %rax,%rax
movq %r11,24(%rdi)
andq $38,%rax
subq %rax,%r8
movq %r8,0(%rdi)
.Lfe64_sub_epilogue:
.byte 0xf3,0xc3
.size x25519_fe64_sub,.-x25519_fe64_sub
.globl x25519_fe64_tobytes
.type x25519_fe64_tobytes,@function
.align 32
x25519_fe64_tobytes:
.Lfe64_to_body:
movq 0(%rsi),%r8
movq 8(%rsi),%r9
movq 16(%rsi),%r10
movq 24(%rsi),%r11
leaq (%r11,%r11,1),%rax
sarq $63,%r11
shrq $1,%rax
andq $19,%r11
addq $19,%r11
addq %r11,%r8
adcq $0,%r9
adcq $0,%r10
adcq $0,%rax
leaq (%rax,%rax,1),%r11
sarq $63,%rax
shrq $1,%r11
notq %rax
andq $19,%rax
subq %rax,%r8
sbbq $0,%r9
sbbq $0,%r10
sbbq $0,%r11
movq %r8,0(%rdi)
movq %r9,8(%rdi)
movq %r10,16(%rdi)
movq %r11,24(%rdi)
.Lfe64_to_epilogue:
.byte 0xf3,0xc3
.size x25519_fe64_tobytes,.-x25519_fe64_tobytes
.byte 88,50,53,53,49,57,32,112,114,105,109,105,116,105,118,101,115,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0

View File

@ -5,7 +5,9 @@
.type _mul_1x1,@function
.align 16
_mul_1x1:
.cfi_startproc
subq $128+8,%rsp
.cfi_adjust_cfa_offset 128+8
movq $-1,%r9
leaq (%rax,%rax,1),%rsi
shrq $3,%r9
@ -195,16 +197,20 @@ _mul_1x1:
xorq %rdi,%rdx
addq $128+8,%rsp
.cfi_adjust_cfa_offset -128-8
.byte 0xf3,0xc3
.Lend_mul_1x1:
.cfi_endproc
.size _mul_1x1,.-_mul_1x1
.globl bn_GF2m_mul_2x2
.type bn_GF2m_mul_2x2,@function
.align 16
bn_GF2m_mul_2x2:
movq OPENSSL_ia32cap_P(%rip),%rax
btq $33,%rax
.cfi_startproc
movq %rsp,%rax
movq OPENSSL_ia32cap_P(%rip),%r10
btq $33,%r10
jnc .Lvanilla_mul_2x2
.byte 102,72,15,110,198
@ -232,11 +238,17 @@ bn_GF2m_mul_2x2:
.align 16
.Lvanilla_mul_2x2:
leaq -136(%rsp),%rsp
.cfi_adjust_cfa_offset 8*17
movq %r14,80(%rsp)
.cfi_rel_offset %r14,8*10
movq %r13,88(%rsp)
.cfi_rel_offset %r13,8*11
movq %r12,96(%rsp)
.cfi_rel_offset %r12,8*12
movq %rbp,104(%rsp)
.cfi_rel_offset %rbp,8*13
movq %rbx,112(%rsp)
.cfi_rel_offset %rbx,8*14
.Lbody_mul_2x2:
movq %rdi,32(%rsp)
movq %rsi,40(%rsp)
@ -281,13 +293,21 @@ bn_GF2m_mul_2x2:
movq %rax,8(%rbp)
movq 80(%rsp),%r14
.cfi_restore %r14
movq 88(%rsp),%r13
.cfi_restore %r13
movq 96(%rsp),%r12
.cfi_restore %r12
movq 104(%rsp),%rbp
.cfi_restore %rbp
movq 112(%rsp),%rbx
.cfi_restore %rbx
leaq 136(%rsp),%rsp
.cfi_adjust_cfa_offset -8*17
.Lepilogue_mul_2x2:
.byte 0xf3,0xc3
.Lend_mul_2x2:
.cfi_endproc
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 16

View File

@ -8,8 +8,10 @@
.type bn_mul_mont,@function
.align 16
bn_mul_mont:
.cfi_startproc
movl %r9d,%r9d
movq %rsp,%rax
.cfi_def_cfa_register %rax
testl $3,%r9d
jnz .Lmul_enter
cmpl $8,%r9d
@ -24,11 +26,17 @@ bn_mul_mont:
.align 16
.Lmul_enter:
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
negq %r9
movq %rsp,%r11
@ -42,6 +50,8 @@ bn_mul_mont:
subq %r10,%r11
andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
@ -59,6 +69,7 @@ bn_mul_mont:
.Lmul_page_walk_done:
movq %rax,8(%rsp,%r9,8)
.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08
.Lmul_body:
movq %rdx,%r12
movq (%r8),%r8
@ -226,32 +237,49 @@ bn_mul_mont:
jnz .Lcopy
movq 8(%rsp,%r9,8),%rsi
.cfi_def_cfa %rsi,8
movq $1,%rax
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmul_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size bn_mul_mont,.-bn_mul_mont
.type bn_mul4x_mont,@function
.align 16
bn_mul4x_mont:
.cfi_startproc
movl %r9d,%r9d
movq %rsp,%rax
.cfi_def_cfa_register %rax
.Lmul4x_enter:
andl $0x80100,%r11d
cmpl $0x80100,%r11d
je .Lmulx4x_enter
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
negq %r9
movq %rsp,%r11
@ -275,6 +303,7 @@ bn_mul4x_mont:
.Lmul4x_page_walk_done:
movq %rax,8(%rsp,%r9,8)
.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08
.Lmul4x_body:
movq %rdi,16(%rsp,%r9,8)
movq %rdx,%r12
@ -642,16 +671,25 @@ bn_mul4x_mont:
decq %r15
jnz .Lcopy4x
movq 8(%rsp,%r9,8),%rsi
.cfi_def_cfa %rsi, 8
movq $1,%rax
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmul4x_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size bn_mul4x_mont,.-bn_mul4x_mont
@ -659,14 +697,22 @@ bn_mul4x_mont:
.type bn_sqr8x_mont,@function
.align 32
bn_sqr8x_mont:
.cfi_startproc
movq %rsp,%rax
.cfi_def_cfa_register %rax
.Lsqr8x_enter:
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
.Lsqr8x_prologue:
movl %r9d,%r10d
@ -722,6 +768,7 @@ bn_sqr8x_mont:
movq %r8,32(%rsp)
movq %rax,40(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
.Lsqr8x_body:
.byte 102,72,15,110,209
@ -787,6 +834,7 @@ bn_sqr8x_mont:
pxor %xmm0,%xmm0
pshufd $0,%xmm1,%xmm1
movq 40(%rsp),%rsi
.cfi_def_cfa %rsi,8
jmp .Lsqr8x_cond_copy
.align 32
@ -816,26 +864,42 @@ bn_sqr8x_mont:
movq $1,%rax
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lsqr8x_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size bn_sqr8x_mont,.-bn_sqr8x_mont
.type bn_mulx4x_mont,@function
.align 32
bn_mulx4x_mont:
.cfi_startproc
movq %rsp,%rax
.cfi_def_cfa_register %rax
.Lmulx4x_enter:
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
.Lmulx4x_prologue:
shll $3,%r9d
@ -881,6 +945,7 @@ bn_mulx4x_mont:
movq %r8,24(%rsp)
movq %rdi,32(%rsp)
movq %rax,40(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
movq %r9,48(%rsp)
jmp .Lmulx4x_body
@ -1125,6 +1190,7 @@ bn_mulx4x_mont:
pxor %xmm0,%xmm0
pshufd $0,%xmm1,%xmm1
movq 40(%rsp),%rsi
.cfi_def_cfa %rsi,8
jmp .Lmulx4x_cond_copy
.align 32
@ -1154,14 +1220,22 @@ bn_mulx4x_mont:
movq $1,%rax
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmulx4x_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size bn_mulx4x_mont,.-bn_mulx4x_mont
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 16

View File

@ -8,8 +8,10 @@
.type bn_mul_mont_gather5,@function
.align 64
bn_mul_mont_gather5:
.cfi_startproc
movl %r9d,%r9d
movq %rsp,%rax
.cfi_def_cfa_register %rax
testl $7,%r9d
jnz .Lmul_enter
movl OPENSSL_ia32cap_P+8(%rip),%r11d
@ -19,11 +21,17 @@ bn_mul_mont_gather5:
.Lmul_enter:
movd 8(%rsp),%xmm5
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
negq %r9
movq %rsp,%r11
@ -37,6 +45,8 @@ bn_mul_mont_gather5:
subq %r10,%r11
andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
@ -54,6 +64,7 @@ bn_mul_mont_gather5:
leaq .Linc(%rip),%r10
movq %rax,8(%rsp,%r9,8)
.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08
.Lmul_body:
leaq 128(%rdx),%r12
@ -411,33 +422,50 @@ bn_mul_mont_gather5:
jnz .Lcopy
movq 8(%rsp,%r9,8),%rsi
.cfi_def_cfa %rsi,8
movq $1,%rax
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmul_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size bn_mul_mont_gather5,.-bn_mul_mont_gather5
.type bn_mul4x_mont_gather5,@function
.align 32
bn_mul4x_mont_gather5:
.cfi_startproc
.byte 0x67
movq %rsp,%rax
.cfi_def_cfa_register %rax
.Lmul4x_enter:
andl $0x80108,%r11d
cmpl $0x80108,%r11d
je .Lmulx4x_enter
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
.Lmul4x_prologue:
.byte 0x67
@ -493,22 +521,32 @@ bn_mul4x_mont_gather5:
negq %r9
movq %rax,40(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
.Lmul4x_body:
call mul4x_internal
movq 40(%rsp),%rsi
.cfi_def_cfa %rsi,8
movq $1,%rax
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmul4x_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
.type mul4x_internal,@function
@ -1040,17 +1078,25 @@ mul4x_internal:
.type bn_power5,@function
.align 32
bn_power5:
.cfi_startproc
movq %rsp,%rax
.cfi_def_cfa_register %rax
movl OPENSSL_ia32cap_P+8(%rip),%r11d
andl $0x80108,%r11d
cmpl $0x80108,%r11d
je .Lpowerx5_enter
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
.Lpower5_prologue:
shll $3,%r9d
@ -1115,6 +1161,7 @@ bn_power5:
movq %r8,32(%rsp)
movq %rax,40(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
.Lpower5_body:
.byte 102,72,15,110,207
.byte 102,72,15,110,209
@ -1141,16 +1188,25 @@ bn_power5:
call mul4x_internal
movq 40(%rsp),%rsi
.cfi_def_cfa %rsi,8
movq $1,%rax
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lpower5_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size bn_power5,.-bn_power5
.globl bn_sqr8x_internal
@ -2001,14 +2057,22 @@ bn_from_montgomery:
.type bn_from_mont8x,@function
.align 32
bn_from_mont8x:
.cfi_startproc
.byte 0x67
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
.Lfrom_prologue:
shll $3,%r9d
@ -2073,6 +2137,7 @@ bn_from_mont8x:
movq %r8,32(%rsp)
movq %rax,40(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
.Lfrom_body:
movq %r9,%r11
leaq 48(%rsp),%rax
@ -2114,7 +2179,6 @@ bn_from_mont8x:
pxor %xmm0,%xmm0
leaq 48(%rsp),%rax
movq 40(%rsp),%rsi
jmp .Lfrom_mont_zero
.align 32
@ -2124,11 +2188,12 @@ bn_from_mont8x:
pxor %xmm0,%xmm0
leaq 48(%rsp),%rax
movq 40(%rsp),%rsi
jmp .Lfrom_mont_zero
.align 32
.Lfrom_mont_zero:
movq 40(%rsp),%rsi
.cfi_def_cfa %rsi,8
movdqa %xmm0,0(%rax)
movdqa %xmm0,16(%rax)
movdqa %xmm0,32(%rax)
@ -2139,26 +2204,42 @@ bn_from_mont8x:
movq $1,%rax
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lfrom_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size bn_from_mont8x,.-bn_from_mont8x
.type bn_mulx4x_mont_gather5,@function
.align 32
bn_mulx4x_mont_gather5:
.cfi_startproc
movq %rsp,%rax
.cfi_def_cfa_register %rax
.Lmulx4x_enter:
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
.Lmulx4x_prologue:
shll $3,%r9d
@ -2224,21 +2305,31 @@ bn_mulx4x_mont_gather5:
movq %r8,32(%rsp)
movq %rax,40(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
.Lmulx4x_body:
call mulx4x_internal
movq 40(%rsp),%rsi
.cfi_def_cfa %rsi,8
movq $1,%rax
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmulx4x_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5
.type mulx4x_internal,@function
@ -2666,14 +2757,22 @@ mulx4x_internal:
.type bn_powerx5,@function
.align 32
bn_powerx5:
.cfi_startproc
movq %rsp,%rax
.cfi_def_cfa_register %rax
.Lpowerx5_enter:
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
.Lpowerx5_prologue:
shll $3,%r9d
@ -2745,6 +2844,7 @@ bn_powerx5:
.byte 102,72,15,110,226
movq %r8,32(%rsp)
movq %rax,40(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
.Lpowerx5_body:
call __bn_sqrx8x_internal
@ -2767,17 +2867,26 @@ bn_powerx5:
call mulx4x_internal
movq 40(%rsp),%rsi
.cfi_def_cfa %rsi,8
movq $1,%rax
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lpowerx5_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size bn_powerx5,.-bn_powerx5
.globl bn_sqrx8x_internal

View File

@ -38,10 +38,12 @@ OPENSSL_rdtsc:
.type OPENSSL_ia32_cpuid,@function
.align 16
OPENSSL_ia32_cpuid:
.cfi_startproc
movq %rbx,%r8
.cfi_register %rbx,%r8
xorl %eax,%eax
movl %eax,8(%rdi)
movq %rax,8(%rdi)
cpuid
movl %eax,%r11d
@ -112,6 +114,7 @@ OPENSSL_ia32_cpuid:
.Lnocacheinfo:
movl $1,%eax
cpuid
movd %eax,%xmm0
andl $0xbfefffff,%edx
cmpl $0,%r9d
jne .Lnotintel
@ -159,26 +162,45 @@ OPENSSL_ia32_cpuid:
jc .Lnotknights
andl $0xfff7ffff,%ebx
.Lnotknights:
movd %xmm0,%eax
andl $0x0fff0ff0,%eax
cmpl $0x00050650,%eax
jne .Lnotskylakex
andl $0xfffeffff,%ebx
.Lnotskylakex:
movl %ebx,8(%rdi)
movl %ecx,12(%rdi)
.Lno_extended_info:
btl $27,%r9d
jnc .Lclear_avx
xorl %ecx,%ecx
.byte 0x0f,0x01,0xd0
andl $0xe6,%eax
cmpl $0xe6,%eax
je .Ldone
andl $0x3fdeffff,8(%rdi)
andl $6,%eax
cmpl $6,%eax
je .Ldone
.Lclear_avx:
movl $0xefffe7ff,%eax
andl %eax,%r9d
andl $0xffffffdf,8(%rdi)
movl $0x3fdeffdf,%eax
andl %eax,8(%rdi)
.Ldone:
shlq $32,%r9
movl %r10d,%eax
movq %r8,%rbx
.cfi_restore %rbx
orq %r9,%rax
.byte 0xf3,0xc3
.cfi_endproc
.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
.globl OPENSSL_cleanse
@ -215,6 +237,40 @@ OPENSSL_cleanse:
jne .Little
.byte 0xf3,0xc3
.size OPENSSL_cleanse,.-OPENSSL_cleanse
.globl CRYPTO_memcmp
.type CRYPTO_memcmp,@function
.align 16
CRYPTO_memcmp:
xorq %rax,%rax
xorq %r10,%r10
cmpq $0,%rdx
je .Lno_data
cmpq $16,%rdx
jne .Loop_cmp
movq (%rdi),%r10
movq 8(%rdi),%r11
movq $1,%rdx
xorq (%rsi),%r10
xorq 8(%rsi),%r11
orq %r11,%r10
cmovnzq %rdx,%rax
.byte 0xf3,0xc3
.align 16
.Loop_cmp:
movb (%rdi),%r10b
leaq 1(%rdi),%rdi
xorb (%rsi),%r10b
leaq 1(%rsi),%rsi
orb %r10b,%al
decq %rdx
jnz .Loop_cmp
negq %rax
shrq $63,%rax
.Lno_data:
.byte 0xf3,0xc3
.size CRYPTO_memcmp,.-CRYPTO_memcmp
.globl OPENSSL_wipe_cpu
.type OPENSSL_wipe_cpu,@function
.align 16
@ -246,32 +302,164 @@ OPENSSL_wipe_cpu:
leaq 8(%rsp),%rax
.byte 0xf3,0xc3
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
.globl OPENSSL_ia32_rdrand
.type OPENSSL_ia32_rdrand,@function
.globl OPENSSL_instrument_bus
.type OPENSSL_instrument_bus,@function
.align 16
OPENSSL_ia32_rdrand:
movl $8,%ecx
.Loop_rdrand:
.byte 72,15,199,240
jc .Lbreak_rdrand
loop .Loop_rdrand
.Lbreak_rdrand:
cmpq $0,%rax
cmoveq %rcx,%rax
.byte 0xf3,0xc3
.size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand
OPENSSL_instrument_bus:
movq %rdi,%r10
movq %rsi,%rcx
movq %rsi,%r11
.globl OPENSSL_ia32_rdseed
.type OPENSSL_ia32_rdseed,@function
rdtsc
movl %eax,%r8d
movl $0,%r9d
clflush (%r10)
.byte 0xf0
addl %r9d,(%r10)
jmp .Loop
.align 16
OPENSSL_ia32_rdseed:
movl $8,%ecx
.Loop_rdseed:
.byte 72,15,199,248
jc .Lbreak_rdseed
loop .Loop_rdseed
.Lbreak_rdseed:
cmpq $0,%rax
cmoveq %rcx,%rax
.Loop: rdtsc
movl %eax,%edx
subl %r8d,%eax
movl %edx,%r8d
movl %eax,%r9d
clflush (%r10)
.byte 0xf0
addl %eax,(%r10)
leaq 4(%r10),%r10
subq $1,%rcx
jnz .Loop
movq %r11,%rax
.byte 0xf3,0xc3
.size OPENSSL_ia32_rdseed,.-OPENSSL_ia32_rdseed
.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
.globl OPENSSL_instrument_bus2
.type OPENSSL_instrument_bus2,@function
.align 16
OPENSSL_instrument_bus2:
movq %rdi,%r10
movq %rsi,%rcx
movq %rdx,%r11
movq %rcx,8(%rsp)
rdtsc
movl %eax,%r8d
movl $0,%r9d
clflush (%r10)
.byte 0xf0
addl %r9d,(%r10)
rdtsc
movl %eax,%edx
subl %r8d,%eax
movl %edx,%r8d
movl %eax,%r9d
.Loop2:
clflush (%r10)
.byte 0xf0
addl %eax,(%r10)
subq $1,%r11
jz .Ldone2
rdtsc
movl %eax,%edx
subl %r8d,%eax
movl %edx,%r8d
cmpl %r9d,%eax
movl %eax,%r9d
movl $0,%edx
setne %dl
subq %rdx,%rcx
leaq (%r10,%rdx,4),%r10
jnz .Loop2
.Ldone2:
movq 8(%rsp),%rax
subq %rcx,%rax
.byte 0xf3,0xc3
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
.globl OPENSSL_ia32_rdrand_bytes
.type OPENSSL_ia32_rdrand_bytes,@function
.align 16
OPENSSL_ia32_rdrand_bytes:
xorq %rax,%rax
cmpq $0,%rsi
je .Ldone_rdrand_bytes
movq $8,%r11
.Loop_rdrand_bytes:
.byte 73,15,199,242
jc .Lbreak_rdrand_bytes
decq %r11
jnz .Loop_rdrand_bytes
jmp .Ldone_rdrand_bytes
.align 16
.Lbreak_rdrand_bytes:
cmpq $8,%rsi
jb .Ltail_rdrand_bytes
movq %r10,(%rdi)
leaq 8(%rdi),%rdi
addq $8,%rax
subq $8,%rsi
jz .Ldone_rdrand_bytes
movq $8,%r11
jmp .Loop_rdrand_bytes
.align 16
.Ltail_rdrand_bytes:
movb %r10b,(%rdi)
leaq 1(%rdi),%rdi
incq %rax
shrq $8,%r10
decq %rsi
jnz .Ltail_rdrand_bytes
.Ldone_rdrand_bytes:
xorq %r10,%r10
.byte 0xf3,0xc3
.size OPENSSL_ia32_rdrand_bytes,.-OPENSSL_ia32_rdrand_bytes
.globl OPENSSL_ia32_rdseed_bytes
.type OPENSSL_ia32_rdseed_bytes,@function
.align 16
OPENSSL_ia32_rdseed_bytes:
xorq %rax,%rax
cmpq $0,%rsi
je .Ldone_rdseed_bytes
movq $8,%r11
.Loop_rdseed_bytes:
.byte 73,15,199,250
jc .Lbreak_rdseed_bytes
decq %r11
jnz .Loop_rdseed_bytes
jmp .Ldone_rdseed_bytes
.align 16
.Lbreak_rdseed_bytes:
cmpq $8,%rsi
jb .Ltail_rdseed_bytes
movq %r10,(%rdi)
leaq 8(%rdi),%rdi
addq $8,%rax
subq $8,%rsi
jz .Ldone_rdseed_bytes
movq $8,%r11
jmp .Loop_rdseed_bytes
.align 16
.Ltail_rdseed_bytes:
movb %r10b,(%rdi)
leaq 1(%rdi),%rdi
incq %rax
shrq $8,%r10
decq %rsi
jnz .Ltail_rdseed_bytes
.Ldone_rdseed_bytes:
xorq %r10,%r10
.byte 0xf3,0xc3
.size OPENSSL_ia32_rdseed_bytes,.-OPENSSL_ia32_rdseed_bytes