4412 lines
88 KiB
ArmAsm
4412 lines
88 KiB
ArmAsm
/* $FreeBSD$ */
|
|
/* Do not modify. This file is auto-generated from aesni-sha256-x86_64.pl. */
|
|
.text
|
|
|
|
|
|
.globl aesni_cbc_sha256_enc
|
|
.type aesni_cbc_sha256_enc,@function
|
|
.align 16
|
|
aesni_cbc_sha256_enc:
|
|
leaq OPENSSL_ia32cap_P(%rip),%r11
|
|
movl $1,%eax
|
|
cmpq $0,%rdi
|
|
je .Lprobe
|
|
movl 0(%r11),%eax
|
|
movq 4(%r11),%r10
|
|
btq $61,%r10
|
|
jc aesni_cbc_sha256_enc_shaext
|
|
movq %r10,%r11
|
|
shrq $32,%r11
|
|
|
|
testl $2048,%r10d
|
|
jnz aesni_cbc_sha256_enc_xop
|
|
andl $296,%r11d
|
|
cmpl $296,%r11d
|
|
je aesni_cbc_sha256_enc_avx2
|
|
andl $268435456,%r10d
|
|
jnz aesni_cbc_sha256_enc_avx
|
|
ud2
|
|
xorl %eax,%eax
|
|
cmpq $0,%rdi
|
|
je .Lprobe
|
|
ud2
|
|
.Lprobe:
|
|
.byte 0xf3,0xc3
|
|
.size aesni_cbc_sha256_enc,.-aesni_cbc_sha256_enc
|
|
|
|
.align 64
|
|
.type K256,@object
|
|
K256:
|
|
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
|
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
|
.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
|
|
.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
|
|
.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
|
|
.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
|
|
.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
|
|
.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
|
|
.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
|
|
.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
|
|
.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
|
|
.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
|
|
.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
|
|
.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
|
|
.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
|
|
.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
|
|
.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
|
|
.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
|
|
.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
|
|
.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
|
|
.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
|
|
.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
|
|
.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
|
|
.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
|
|
.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
|
|
.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
|
|
.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
|
|
.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
|
|
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
|
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
|
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
|
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
|
|
|
.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
|
|
.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
|
|
.long 0,0,0,0, 0,0,0,0, -1,-1,-1,-1
|
|
.long 0,0,0,0, 0,0,0,0
|
|
.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
|
.align 64
|
|
.type aesni_cbc_sha256_enc_xop,@function
|
|
.align 64
|
|
aesni_cbc_sha256_enc_xop:
|
|
.cfi_startproc
|
|
.Lxop_shortcut:
|
|
movq 8(%rsp),%r10
|
|
movq %rsp,%rax
|
|
.cfi_def_cfa_register %rax
|
|
pushq %rbx
|
|
.cfi_offset %rbx,-16
|
|
pushq %rbp
|
|
.cfi_offset %rbp,-24
|
|
pushq %r12
|
|
.cfi_offset %r12,-32
|
|
pushq %r13
|
|
.cfi_offset %r13,-40
|
|
pushq %r14
|
|
.cfi_offset %r14,-48
|
|
pushq %r15
|
|
.cfi_offset %r15,-56
|
|
subq $128,%rsp
|
|
andq $-64,%rsp
|
|
|
|
shlq $6,%rdx
|
|
subq %rdi,%rsi
|
|
subq %rdi,%r10
|
|
addq %rdi,%rdx
|
|
|
|
|
|
movq %rsi,64+8(%rsp)
|
|
movq %rdx,64+16(%rsp)
|
|
|
|
movq %r8,64+32(%rsp)
|
|
movq %r9,64+40(%rsp)
|
|
movq %r10,64+48(%rsp)
|
|
movq %rax,120(%rsp)
|
|
.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
|
|
.Lprologue_xop:
|
|
vzeroall
|
|
|
|
movq %rdi,%r12
|
|
leaq 128(%rcx),%rdi
|
|
leaq K256+544(%rip),%r13
|
|
movl 240-128(%rdi),%r14d
|
|
movq %r9,%r15
|
|
movq %r10,%rsi
|
|
vmovdqu (%r8),%xmm8
|
|
subq $9,%r14
|
|
|
|
movl 0(%r15),%eax
|
|
movl 4(%r15),%ebx
|
|
movl 8(%r15),%ecx
|
|
movl 12(%r15),%edx
|
|
movl 16(%r15),%r8d
|
|
movl 20(%r15),%r9d
|
|
movl 24(%r15),%r10d
|
|
movl 28(%r15),%r11d
|
|
|
|
vmovdqa 0(%r13,%r14,8),%xmm14
|
|
vmovdqa 16(%r13,%r14,8),%xmm13
|
|
vmovdqa 32(%r13,%r14,8),%xmm12
|
|
vmovdqu 0-128(%rdi),%xmm10
|
|
jmp .Lloop_xop
|
|
.align 16
|
|
.Lloop_xop:
|
|
vmovdqa K256+512(%rip),%xmm7
|
|
vmovdqu 0(%rsi,%r12,1),%xmm0
|
|
vmovdqu 16(%rsi,%r12,1),%xmm1
|
|
vmovdqu 32(%rsi,%r12,1),%xmm2
|
|
vmovdqu 48(%rsi,%r12,1),%xmm3
|
|
vpshufb %xmm7,%xmm0,%xmm0
|
|
leaq K256(%rip),%rbp
|
|
vpshufb %xmm7,%xmm1,%xmm1
|
|
vpshufb %xmm7,%xmm2,%xmm2
|
|
vpaddd 0(%rbp),%xmm0,%xmm4
|
|
vpshufb %xmm7,%xmm3,%xmm3
|
|
vpaddd 32(%rbp),%xmm1,%xmm5
|
|
vpaddd 64(%rbp),%xmm2,%xmm6
|
|
vpaddd 96(%rbp),%xmm3,%xmm7
|
|
vmovdqa %xmm4,0(%rsp)
|
|
movl %eax,%r14d
|
|
vmovdqa %xmm5,16(%rsp)
|
|
movl %ebx,%esi
|
|
vmovdqa %xmm6,32(%rsp)
|
|
xorl %ecx,%esi
|
|
vmovdqa %xmm7,48(%rsp)
|
|
movl %r8d,%r13d
|
|
jmp .Lxop_00_47
|
|
|
|
.align 16
|
|
.Lxop_00_47:
|
|
subq $-32*4,%rbp
|
|
vmovdqu (%r12),%xmm9
|
|
movq %r12,64+0(%rsp)
|
|
vpalignr $4,%xmm0,%xmm1,%xmm4
|
|
rorl $14,%r13d
|
|
movl %r14d,%eax
|
|
vpalignr $4,%xmm2,%xmm3,%xmm7
|
|
movl %r9d,%r12d
|
|
xorl %r8d,%r13d
|
|
.byte 143,232,120,194,236,14
|
|
rorl $9,%r14d
|
|
xorl %r10d,%r12d
|
|
vpsrld $3,%xmm4,%xmm4
|
|
rorl $5,%r13d
|
|
xorl %eax,%r14d
|
|
vpaddd %xmm7,%xmm0,%xmm0
|
|
andl %r8d,%r12d
|
|
vpxor %xmm10,%xmm9,%xmm9
|
|
vmovdqu 16-128(%rdi),%xmm10
|
|
xorl %r8d,%r13d
|
|
addl 0(%rsp),%r11d
|
|
movl %eax,%r15d
|
|
.byte 143,232,120,194,245,11
|
|
rorl $11,%r14d
|
|
xorl %r10d,%r12d
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
xorl %ebx,%r15d
|
|
rorl $6,%r13d
|
|
addl %r12d,%r11d
|
|
andl %r15d,%esi
|
|
.byte 143,232,120,194,251,13
|
|
xorl %eax,%r14d
|
|
addl %r13d,%r11d
|
|
vpxor %xmm6,%xmm4,%xmm4
|
|
xorl %ebx,%esi
|
|
addl %r11d,%edx
|
|
vpsrld $10,%xmm3,%xmm6
|
|
rorl $2,%r14d
|
|
addl %esi,%r11d
|
|
vpaddd %xmm4,%xmm0,%xmm0
|
|
movl %edx,%r13d
|
|
addl %r11d,%r14d
|
|
.byte 143,232,120,194,239,2
|
|
rorl $14,%r13d
|
|
movl %r14d,%r11d
|
|
vpxor %xmm6,%xmm7,%xmm7
|
|
movl %r8d,%r12d
|
|
xorl %edx,%r13d
|
|
rorl $9,%r14d
|
|
xorl %r9d,%r12d
|
|
vpxor %xmm5,%xmm7,%xmm7
|
|
rorl $5,%r13d
|
|
xorl %r11d,%r14d
|
|
andl %edx,%r12d
|
|
vpxor %xmm8,%xmm9,%xmm9
|
|
xorl %edx,%r13d
|
|
vpsrldq $8,%xmm7,%xmm7
|
|
addl 4(%rsp),%r10d
|
|
movl %r11d,%esi
|
|
rorl $11,%r14d
|
|
xorl %r9d,%r12d
|
|
vpaddd %xmm7,%xmm0,%xmm0
|
|
xorl %eax,%esi
|
|
rorl $6,%r13d
|
|
addl %r12d,%r10d
|
|
andl %esi,%r15d
|
|
.byte 143,232,120,194,248,13
|
|
xorl %r11d,%r14d
|
|
addl %r13d,%r10d
|
|
vpsrld $10,%xmm0,%xmm6
|
|
xorl %eax,%r15d
|
|
addl %r10d,%ecx
|
|
.byte 143,232,120,194,239,2
|
|
rorl $2,%r14d
|
|
addl %r15d,%r10d
|
|
vpxor %xmm6,%xmm7,%xmm7
|
|
movl %ecx,%r13d
|
|
addl %r10d,%r14d
|
|
rorl $14,%r13d
|
|
movl %r14d,%r10d
|
|
vpxor %xmm5,%xmm7,%xmm7
|
|
movl %edx,%r12d
|
|
xorl %ecx,%r13d
|
|
rorl $9,%r14d
|
|
xorl %r8d,%r12d
|
|
vpslldq $8,%xmm7,%xmm7
|
|
rorl $5,%r13d
|
|
xorl %r10d,%r14d
|
|
andl %ecx,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 32-128(%rdi),%xmm10
|
|
xorl %ecx,%r13d
|
|
vpaddd %xmm7,%xmm0,%xmm0
|
|
addl 8(%rsp),%r9d
|
|
movl %r10d,%r15d
|
|
rorl $11,%r14d
|
|
xorl %r8d,%r12d
|
|
vpaddd 0(%rbp),%xmm0,%xmm6
|
|
xorl %r11d,%r15d
|
|
rorl $6,%r13d
|
|
addl %r12d,%r9d
|
|
andl %r15d,%esi
|
|
xorl %r10d,%r14d
|
|
addl %r13d,%r9d
|
|
xorl %r11d,%esi
|
|
addl %r9d,%ebx
|
|
rorl $2,%r14d
|
|
addl %esi,%r9d
|
|
movl %ebx,%r13d
|
|
addl %r9d,%r14d
|
|
rorl $14,%r13d
|
|
movl %r14d,%r9d
|
|
movl %ecx,%r12d
|
|
xorl %ebx,%r13d
|
|
rorl $9,%r14d
|
|
xorl %edx,%r12d
|
|
rorl $5,%r13d
|
|
xorl %r9d,%r14d
|
|
andl %ebx,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 48-128(%rdi),%xmm10
|
|
xorl %ebx,%r13d
|
|
addl 12(%rsp),%r8d
|
|
movl %r9d,%esi
|
|
rorl $11,%r14d
|
|
xorl %edx,%r12d
|
|
xorl %r10d,%esi
|
|
rorl $6,%r13d
|
|
addl %r12d,%r8d
|
|
andl %esi,%r15d
|
|
xorl %r9d,%r14d
|
|
addl %r13d,%r8d
|
|
xorl %r10d,%r15d
|
|
addl %r8d,%eax
|
|
rorl $2,%r14d
|
|
addl %r15d,%r8d
|
|
movl %eax,%r13d
|
|
addl %r8d,%r14d
|
|
vmovdqa %xmm6,0(%rsp)
|
|
vpalignr $4,%xmm1,%xmm2,%xmm4
|
|
rorl $14,%r13d
|
|
movl %r14d,%r8d
|
|
vpalignr $4,%xmm3,%xmm0,%xmm7
|
|
movl %ebx,%r12d
|
|
xorl %eax,%r13d
|
|
.byte 143,232,120,194,236,14
|
|
rorl $9,%r14d
|
|
xorl %ecx,%r12d
|
|
vpsrld $3,%xmm4,%xmm4
|
|
rorl $5,%r13d
|
|
xorl %r8d,%r14d
|
|
vpaddd %xmm7,%xmm1,%xmm1
|
|
andl %eax,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 64-128(%rdi),%xmm10
|
|
xorl %eax,%r13d
|
|
addl 16(%rsp),%edx
|
|
movl %r8d,%r15d
|
|
.byte 143,232,120,194,245,11
|
|
rorl $11,%r14d
|
|
xorl %ecx,%r12d
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
xorl %r9d,%r15d
|
|
rorl $6,%r13d
|
|
addl %r12d,%edx
|
|
andl %r15d,%esi
|
|
.byte 143,232,120,194,248,13
|
|
xorl %r8d,%r14d
|
|
addl %r13d,%edx
|
|
vpxor %xmm6,%xmm4,%xmm4
|
|
xorl %r9d,%esi
|
|
addl %edx,%r11d
|
|
vpsrld $10,%xmm0,%xmm6
|
|
rorl $2,%r14d
|
|
addl %esi,%edx
|
|
vpaddd %xmm4,%xmm1,%xmm1
|
|
movl %r11d,%r13d
|
|
addl %edx,%r14d
|
|
.byte 143,232,120,194,239,2
|
|
rorl $14,%r13d
|
|
movl %r14d,%edx
|
|
vpxor %xmm6,%xmm7,%xmm7
|
|
movl %eax,%r12d
|
|
xorl %r11d,%r13d
|
|
rorl $9,%r14d
|
|
xorl %ebx,%r12d
|
|
vpxor %xmm5,%xmm7,%xmm7
|
|
rorl $5,%r13d
|
|
xorl %edx,%r14d
|
|
andl %r11d,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 80-128(%rdi),%xmm10
|
|
xorl %r11d,%r13d
|
|
vpsrldq $8,%xmm7,%xmm7
|
|
addl 20(%rsp),%ecx
|
|
movl %edx,%esi
|
|
rorl $11,%r14d
|
|
xorl %ebx,%r12d
|
|
vpaddd %xmm7,%xmm1,%xmm1
|
|
xorl %r8d,%esi
|
|
rorl $6,%r13d
|
|
addl %r12d,%ecx
|
|
andl %esi,%r15d
|
|
.byte 143,232,120,194,249,13
|
|
xorl %edx,%r14d
|
|
addl %r13d,%ecx
|
|
vpsrld $10,%xmm1,%xmm6
|
|
xorl %r8d,%r15d
|
|
addl %ecx,%r10d
|
|
.byte 143,232,120,194,239,2
|
|
rorl $2,%r14d
|
|
addl %r15d,%ecx
|
|
vpxor %xmm6,%xmm7,%xmm7
|
|
movl %r10d,%r13d
|
|
addl %ecx,%r14d
|
|
rorl $14,%r13d
|
|
movl %r14d,%ecx
|
|
vpxor %xmm5,%xmm7,%xmm7
|
|
movl %r11d,%r12d
|
|
xorl %r10d,%r13d
|
|
rorl $9,%r14d
|
|
xorl %eax,%r12d
|
|
vpslldq $8,%xmm7,%xmm7
|
|
rorl $5,%r13d
|
|
xorl %ecx,%r14d
|
|
andl %r10d,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 96-128(%rdi),%xmm10
|
|
xorl %r10d,%r13d
|
|
vpaddd %xmm7,%xmm1,%xmm1
|
|
addl 24(%rsp),%ebx
|
|
movl %ecx,%r15d
|
|
rorl $11,%r14d
|
|
xorl %eax,%r12d
|
|
vpaddd 32(%rbp),%xmm1,%xmm6
|
|
xorl %edx,%r15d
|
|
rorl $6,%r13d
|
|
addl %r12d,%ebx
|
|
andl %r15d,%esi
|
|
xorl %ecx,%r14d
|
|
addl %r13d,%ebx
|
|
xorl %edx,%esi
|
|
addl %ebx,%r9d
|
|
rorl $2,%r14d
|
|
addl %esi,%ebx
|
|
movl %r9d,%r13d
|
|
addl %ebx,%r14d
|
|
rorl $14,%r13d
|
|
movl %r14d,%ebx
|
|
movl %r10d,%r12d
|
|
xorl %r9d,%r13d
|
|
rorl $9,%r14d
|
|
xorl %r11d,%r12d
|
|
rorl $5,%r13d
|
|
xorl %ebx,%r14d
|
|
andl %r9d,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 112-128(%rdi),%xmm10
|
|
xorl %r9d,%r13d
|
|
addl 28(%rsp),%eax
|
|
movl %ebx,%esi
|
|
rorl $11,%r14d
|
|
xorl %r11d,%r12d
|
|
xorl %ecx,%esi
|
|
rorl $6,%r13d
|
|
addl %r12d,%eax
|
|
andl %esi,%r15d
|
|
xorl %ebx,%r14d
|
|
addl %r13d,%eax
|
|
xorl %ecx,%r15d
|
|
addl %eax,%r8d
|
|
rorl $2,%r14d
|
|
addl %r15d,%eax
|
|
movl %r8d,%r13d
|
|
addl %eax,%r14d
|
|
vmovdqa %xmm6,16(%rsp)
|
|
vpalignr $4,%xmm2,%xmm3,%xmm4
|
|
rorl $14,%r13d
|
|
movl %r14d,%eax
|
|
vpalignr $4,%xmm0,%xmm1,%xmm7
|
|
movl %r9d,%r12d
|
|
xorl %r8d,%r13d
|
|
.byte 143,232,120,194,236,14
|
|
rorl $9,%r14d
|
|
xorl %r10d,%r12d
|
|
vpsrld $3,%xmm4,%xmm4
|
|
rorl $5,%r13d
|
|
xorl %eax,%r14d
|
|
vpaddd %xmm7,%xmm2,%xmm2
|
|
andl %r8d,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 128-128(%rdi),%xmm10
|
|
xorl %r8d,%r13d
|
|
addl 32(%rsp),%r11d
|
|
movl %eax,%r15d
|
|
.byte 143,232,120,194,245,11
|
|
rorl $11,%r14d
|
|
xorl %r10d,%r12d
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
xorl %ebx,%r15d
|
|
rorl $6,%r13d
|
|
addl %r12d,%r11d
|
|
andl %r15d,%esi
|
|
.byte 143,232,120,194,249,13
|
|
xorl %eax,%r14d
|
|
addl %r13d,%r11d
|
|
vpxor %xmm6,%xmm4,%xmm4
|
|
xorl %ebx,%esi
|
|
addl %r11d,%edx
|
|
vpsrld $10,%xmm1,%xmm6
|
|
rorl $2,%r14d
|
|
addl %esi,%r11d
|
|
vpaddd %xmm4,%xmm2,%xmm2
|
|
movl %edx,%r13d
|
|
addl %r11d,%r14d
|
|
.byte 143,232,120,194,239,2
|
|
rorl $14,%r13d
|
|
movl %r14d,%r11d
|
|
vpxor %xmm6,%xmm7,%xmm7
|
|
movl %r8d,%r12d
|
|
xorl %edx,%r13d
|
|
rorl $9,%r14d
|
|
xorl %r9d,%r12d
|
|
vpxor %xmm5,%xmm7,%xmm7
|
|
rorl $5,%r13d
|
|
xorl %r11d,%r14d
|
|
andl %edx,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 144-128(%rdi),%xmm10
|
|
xorl %edx,%r13d
|
|
vpsrldq $8,%xmm7,%xmm7
|
|
addl 36(%rsp),%r10d
|
|
movl %r11d,%esi
|
|
rorl $11,%r14d
|
|
xorl %r9d,%r12d
|
|
vpaddd %xmm7,%xmm2,%xmm2
|
|
xorl %eax,%esi
|
|
rorl $6,%r13d
|
|
addl %r12d,%r10d
|
|
andl %esi,%r15d
|
|
.byte 143,232,120,194,250,13
|
|
xorl %r11d,%r14d
|
|
addl %r13d,%r10d
|
|
vpsrld $10,%xmm2,%xmm6
|
|
xorl %eax,%r15d
|
|
addl %r10d,%ecx
|
|
.byte 143,232,120,194,239,2
|
|
rorl $2,%r14d
|
|
addl %r15d,%r10d
|
|
vpxor %xmm6,%xmm7,%xmm7
|
|
movl %ecx,%r13d
|
|
addl %r10d,%r14d
|
|
rorl $14,%r13d
|
|
movl %r14d,%r10d
|
|
vpxor %xmm5,%xmm7,%xmm7
|
|
movl %edx,%r12d
|
|
xorl %ecx,%r13d
|
|
rorl $9,%r14d
|
|
xorl %r8d,%r12d
|
|
vpslldq $8,%xmm7,%xmm7
|
|
rorl $5,%r13d
|
|
xorl %r10d,%r14d
|
|
andl %ecx,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 160-128(%rdi),%xmm10
|
|
xorl %ecx,%r13d
|
|
vpaddd %xmm7,%xmm2,%xmm2
|
|
addl 40(%rsp),%r9d
|
|
movl %r10d,%r15d
|
|
rorl $11,%r14d
|
|
xorl %r8d,%r12d
|
|
vpaddd 64(%rbp),%xmm2,%xmm6
|
|
xorl %r11d,%r15d
|
|
rorl $6,%r13d
|
|
addl %r12d,%r9d
|
|
andl %r15d,%esi
|
|
xorl %r10d,%r14d
|
|
addl %r13d,%r9d
|
|
xorl %r11d,%esi
|
|
addl %r9d,%ebx
|
|
rorl $2,%r14d
|
|
addl %esi,%r9d
|
|
movl %ebx,%r13d
|
|
addl %r9d,%r14d
|
|
rorl $14,%r13d
|
|
movl %r14d,%r9d
|
|
movl %ecx,%r12d
|
|
xorl %ebx,%r13d
|
|
rorl $9,%r14d
|
|
xorl %edx,%r12d
|
|
rorl $5,%r13d
|
|
xorl %r9d,%r14d
|
|
andl %ebx,%r12d
|
|
vaesenclast %xmm10,%xmm9,%xmm11
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 176-128(%rdi),%xmm10
|
|
xorl %ebx,%r13d
|
|
addl 44(%rsp),%r8d
|
|
movl %r9d,%esi
|
|
rorl $11,%r14d
|
|
xorl %edx,%r12d
|
|
xorl %r10d,%esi
|
|
rorl $6,%r13d
|
|
addl %r12d,%r8d
|
|
andl %esi,%r15d
|
|
xorl %r9d,%r14d
|
|
addl %r13d,%r8d
|
|
xorl %r10d,%r15d
|
|
addl %r8d,%eax
|
|
rorl $2,%r14d
|
|
addl %r15d,%r8d
|
|
movl %eax,%r13d
|
|
addl %r8d,%r14d
|
|
vmovdqa %xmm6,32(%rsp)
|
|
vpalignr $4,%xmm3,%xmm0,%xmm4
|
|
rorl $14,%r13d
|
|
movl %r14d,%r8d
|
|
vpalignr $4,%xmm1,%xmm2,%xmm7
|
|
movl %ebx,%r12d
|
|
xorl %eax,%r13d
|
|
.byte 143,232,120,194,236,14
|
|
rorl $9,%r14d
|
|
xorl %ecx,%r12d
|
|
vpsrld $3,%xmm4,%xmm4
|
|
rorl $5,%r13d
|
|
xorl %r8d,%r14d
|
|
vpaddd %xmm7,%xmm3,%xmm3
|
|
andl %eax,%r12d
|
|
vpand %xmm12,%xmm11,%xmm8
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 192-128(%rdi),%xmm10
|
|
xorl %eax,%r13d
|
|
addl 48(%rsp),%edx
|
|
movl %r8d,%r15d
|
|
.byte 143,232,120,194,245,11
|
|
rorl $11,%r14d
|
|
xorl %ecx,%r12d
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
xorl %r9d,%r15d
|
|
rorl $6,%r13d
|
|
addl %r12d,%edx
|
|
andl %r15d,%esi
|
|
.byte 143,232,120,194,250,13
|
|
xorl %r8d,%r14d
|
|
addl %r13d,%edx
|
|
vpxor %xmm6,%xmm4,%xmm4
|
|
xorl %r9d,%esi
|
|
addl %edx,%r11d
|
|
vpsrld $10,%xmm2,%xmm6
|
|
rorl $2,%r14d
|
|
addl %esi,%edx
|
|
vpaddd %xmm4,%xmm3,%xmm3
|
|
movl %r11d,%r13d
|
|
addl %edx,%r14d
|
|
.byte 143,232,120,194,239,2
|
|
rorl $14,%r13d
|
|
movl %r14d,%edx
|
|
vpxor %xmm6,%xmm7,%xmm7
|
|
movl %eax,%r12d
|
|
xorl %r11d,%r13d
|
|
rorl $9,%r14d
|
|
xorl %ebx,%r12d
|
|
vpxor %xmm5,%xmm7,%xmm7
|
|
rorl $5,%r13d
|
|
xorl %edx,%r14d
|
|
andl %r11d,%r12d
|
|
vaesenclast %xmm10,%xmm9,%xmm11
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 208-128(%rdi),%xmm10
|
|
xorl %r11d,%r13d
|
|
vpsrldq $8,%xmm7,%xmm7
|
|
addl 52(%rsp),%ecx
|
|
movl %edx,%esi
|
|
rorl $11,%r14d
|
|
xorl %ebx,%r12d
|
|
vpaddd %xmm7,%xmm3,%xmm3
|
|
xorl %r8d,%esi
|
|
rorl $6,%r13d
|
|
addl %r12d,%ecx
|
|
andl %esi,%r15d
|
|
.byte 143,232,120,194,251,13
|
|
xorl %edx,%r14d
|
|
addl %r13d,%ecx
|
|
vpsrld $10,%xmm3,%xmm6
|
|
xorl %r8d,%r15d
|
|
addl %ecx,%r10d
|
|
.byte 143,232,120,194,239,2
|
|
rorl $2,%r14d
|
|
addl %r15d,%ecx
|
|
vpxor %xmm6,%xmm7,%xmm7
|
|
movl %r10d,%r13d
|
|
addl %ecx,%r14d
|
|
rorl $14,%r13d
|
|
movl %r14d,%ecx
|
|
vpxor %xmm5,%xmm7,%xmm7
|
|
movl %r11d,%r12d
|
|
xorl %r10d,%r13d
|
|
rorl $9,%r14d
|
|
xorl %eax,%r12d
|
|
vpslldq $8,%xmm7,%xmm7
|
|
rorl $5,%r13d
|
|
xorl %ecx,%r14d
|
|
andl %r10d,%r12d
|
|
vpand %xmm13,%xmm11,%xmm11
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 224-128(%rdi),%xmm10
|
|
xorl %r10d,%r13d
|
|
vpaddd %xmm7,%xmm3,%xmm3
|
|
addl 56(%rsp),%ebx
|
|
movl %ecx,%r15d
|
|
rorl $11,%r14d
|
|
xorl %eax,%r12d
|
|
vpaddd 96(%rbp),%xmm3,%xmm6
|
|
xorl %edx,%r15d
|
|
rorl $6,%r13d
|
|
addl %r12d,%ebx
|
|
andl %r15d,%esi
|
|
xorl %ecx,%r14d
|
|
addl %r13d,%ebx
|
|
xorl %edx,%esi
|
|
addl %ebx,%r9d
|
|
rorl $2,%r14d
|
|
addl %esi,%ebx
|
|
movl %r9d,%r13d
|
|
addl %ebx,%r14d
|
|
rorl $14,%r13d
|
|
movl %r14d,%ebx
|
|
movl %r10d,%r12d
|
|
xorl %r9d,%r13d
|
|
rorl $9,%r14d
|
|
xorl %r11d,%r12d
|
|
rorl $5,%r13d
|
|
xorl %ebx,%r14d
|
|
andl %r9d,%r12d
|
|
vpor %xmm11,%xmm8,%xmm8
|
|
vaesenclast %xmm10,%xmm9,%xmm11
|
|
vmovdqu 0-128(%rdi),%xmm10
|
|
xorl %r9d,%r13d
|
|
addl 60(%rsp),%eax
|
|
movl %ebx,%esi
|
|
rorl $11,%r14d
|
|
xorl %r11d,%r12d
|
|
xorl %ecx,%esi
|
|
rorl $6,%r13d
|
|
addl %r12d,%eax
|
|
andl %esi,%r15d
|
|
xorl %ebx,%r14d
|
|
addl %r13d,%eax
|
|
xorl %ecx,%r15d
|
|
addl %eax,%r8d
|
|
rorl $2,%r14d
|
|
addl %r15d,%eax
|
|
movl %r8d,%r13d
|
|
addl %eax,%r14d
|
|
vmovdqa %xmm6,48(%rsp)
|
|
movq 64+0(%rsp),%r12
|
|
vpand %xmm14,%xmm11,%xmm11
|
|
movq 64+8(%rsp),%r15
|
|
vpor %xmm11,%xmm8,%xmm8
|
|
vmovdqu %xmm8,(%r15,%r12,1)
|
|
leaq 16(%r12),%r12
|
|
cmpb $0,131(%rbp)
|
|
jne .Lxop_00_47
|
|
vmovdqu (%r12),%xmm9
|
|
movq %r12,64+0(%rsp)
|
|
rorl $14,%r13d
|
|
movl %r14d,%eax
|
|
movl %r9d,%r12d
|
|
xorl %r8d,%r13d
|
|
rorl $9,%r14d
|
|
xorl %r10d,%r12d
|
|
rorl $5,%r13d
|
|
xorl %eax,%r14d
|
|
andl %r8d,%r12d
|
|
vpxor %xmm10,%xmm9,%xmm9
|
|
vmovdqu 16-128(%rdi),%xmm10
|
|
xorl %r8d,%r13d
|
|
addl 0(%rsp),%r11d
|
|
movl %eax,%r15d
|
|
rorl $11,%r14d
|
|
xorl %r10d,%r12d
|
|
xorl %ebx,%r15d
|
|
rorl $6,%r13d
|
|
addl %r12d,%r11d
|
|
andl %r15d,%esi
|
|
xorl %eax,%r14d
|
|
addl %r13d,%r11d
|
|
xorl %ebx,%esi
|
|
addl %r11d,%edx
|
|
rorl $2,%r14d
|
|
addl %esi,%r11d
|
|
movl %edx,%r13d
|
|
addl %r11d,%r14d
|
|
rorl $14,%r13d
|
|
movl %r14d,%r11d
|
|
movl %r8d,%r12d
|
|
xorl %edx,%r13d
|
|
rorl $9,%r14d
|
|
xorl %r9d,%r12d
|
|
rorl $5,%r13d
|
|
xorl %r11d,%r14d
|
|
andl %edx,%r12d
|
|
vpxor %xmm8,%xmm9,%xmm9
|
|
xorl %edx,%r13d
|
|
addl 4(%rsp),%r10d
|
|
movl %r11d,%esi
|
|
rorl $11,%r14d
|
|
xorl %r9d,%r12d
|
|
xorl %eax,%esi
|
|
rorl $6,%r13d
|
|
addl %r12d,%r10d
|
|
andl %esi,%r15d
|
|
xorl %r11d,%r14d
|
|
addl %r13d,%r10d
|
|
xorl %eax,%r15d
|
|
addl %r10d,%ecx
|
|
rorl $2,%r14d
|
|
addl %r15d,%r10d
|
|
movl %ecx,%r13d
|
|
addl %r10d,%r14d
|
|
rorl $14,%r13d
|
|
movl %r14d,%r10d
|
|
movl %edx,%r12d
|
|
xorl %ecx,%r13d
|
|
rorl $9,%r14d
|
|
xorl %r8d,%r12d
|
|
rorl $5,%r13d
|
|
xorl %r10d,%r14d
|
|
andl %ecx,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 32-128(%rdi),%xmm10
|
|
xorl %ecx,%r13d
|
|
addl 8(%rsp),%r9d
|
|
movl %r10d,%r15d
|
|
rorl $11,%r14d
|
|
xorl %r8d,%r12d
|
|
xorl %r11d,%r15d
|
|
rorl $6,%r13d
|
|
addl %r12d,%r9d
|
|
andl %r15d,%esi
|
|
xorl %r10d,%r14d
|
|
addl %r13d,%r9d
|
|
xorl %r11d,%esi
|
|
addl %r9d,%ebx
|
|
rorl $2,%r14d
|
|
addl %esi,%r9d
|
|
movl %ebx,%r13d
|
|
addl %r9d,%r14d
|
|
rorl $14,%r13d
|
|
movl %r14d,%r9d
|
|
movl %ecx,%r12d
|
|
xorl %ebx,%r13d
|
|
rorl $9,%r14d
|
|
xorl %edx,%r12d
|
|
rorl $5,%r13d
|
|
xorl %r9d,%r14d
|
|
andl %ebx,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 48-128(%rdi),%xmm10
|
|
xorl %ebx,%r13d
|
|
addl 12(%rsp),%r8d
|
|
movl %r9d,%esi
|
|
rorl $11,%r14d
|
|
xorl %edx,%r12d
|
|
xorl %r10d,%esi
|
|
rorl $6,%r13d
|
|
addl %r12d,%r8d
|
|
andl %esi,%r15d
|
|
xorl %r9d,%r14d
|
|
addl %r13d,%r8d
|
|
xorl %r10d,%r15d
|
|
addl %r8d,%eax
|
|
rorl $2,%r14d
|
|
addl %r15d,%r8d
|
|
movl %eax,%r13d
|
|
addl %r8d,%r14d
|
|
rorl $14,%r13d
|
|
movl %r14d,%r8d
|
|
movl %ebx,%r12d
|
|
xorl %eax,%r13d
|
|
rorl $9,%r14d
|
|
xorl %ecx,%r12d
|
|
rorl $5,%r13d
|
|
xorl %r8d,%r14d
|
|
andl %eax,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 64-128(%rdi),%xmm10
|
|
xorl %eax,%r13d
|
|
addl 16(%rsp),%edx
|
|
movl %r8d,%r15d
|
|
rorl $11,%r14d
|
|
xorl %ecx,%r12d
|
|
xorl %r9d,%r15d
|
|
rorl $6,%r13d
|
|
addl %r12d,%edx
|
|
andl %r15d,%esi
|
|
xorl %r8d,%r14d
|
|
addl %r13d,%edx
|
|
xorl %r9d,%esi
|
|
addl %edx,%r11d
|
|
rorl $2,%r14d
|
|
addl %esi,%edx
|
|
movl %r11d,%r13d
|
|
addl %edx,%r14d
|
|
rorl $14,%r13d
|
|
movl %r14d,%edx
|
|
movl %eax,%r12d
|
|
xorl %r11d,%r13d
|
|
rorl $9,%r14d
|
|
xorl %ebx,%r12d
|
|
rorl $5,%r13d
|
|
xorl %edx,%r14d
|
|
andl %r11d,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 80-128(%rdi),%xmm10
|
|
xorl %r11d,%r13d
|
|
addl 20(%rsp),%ecx
|
|
movl %edx,%esi
|
|
rorl $11,%r14d
|
|
xorl %ebx,%r12d
|
|
xorl %r8d,%esi
|
|
rorl $6,%r13d
|
|
addl %r12d,%ecx
|
|
andl %esi,%r15d
|
|
xorl %edx,%r14d
|
|
addl %r13d,%ecx
|
|
xorl %r8d,%r15d
|
|
addl %ecx,%r10d
|
|
rorl $2,%r14d
|
|
addl %r15d,%ecx
|
|
movl %r10d,%r13d
|
|
addl %ecx,%r14d
|
|
rorl $14,%r13d
|
|
movl %r14d,%ecx
|
|
movl %r11d,%r12d
|
|
xorl %r10d,%r13d
|
|
rorl $9,%r14d
|
|
xorl %eax,%r12d
|
|
rorl $5,%r13d
|
|
xorl %ecx,%r14d
|
|
andl %r10d,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 96-128(%rdi),%xmm10
|
|
xorl %r10d,%r13d
|
|
addl 24(%rsp),%ebx
|
|
movl %ecx,%r15d
|
|
rorl $11,%r14d
|
|
xorl %eax,%r12d
|
|
xorl %edx,%r15d
|
|
rorl $6,%r13d
|
|
addl %r12d,%ebx
|
|
andl %r15d,%esi
|
|
xorl %ecx,%r14d
|
|
addl %r13d,%ebx
|
|
xorl %edx,%esi
|
|
addl %ebx,%r9d
|
|
rorl $2,%r14d
|
|
addl %esi,%ebx
|
|
movl %r9d,%r13d
|
|
addl %ebx,%r14d
|
|
rorl $14,%r13d
|
|
movl %r14d,%ebx
|
|
movl %r10d,%r12d
|
|
xorl %r9d,%r13d
|
|
rorl $9,%r14d
|
|
xorl %r11d,%r12d
|
|
rorl $5,%r13d
|
|
xorl %ebx,%r14d
|
|
andl %r9d,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 112-128(%rdi),%xmm10
|
|
xorl %r9d,%r13d
|
|
addl 28(%rsp),%eax
|
|
movl %ebx,%esi
|
|
rorl $11,%r14d
|
|
xorl %r11d,%r12d
|
|
xorl %ecx,%esi
|
|
rorl $6,%r13d
|
|
addl %r12d,%eax
|
|
andl %esi,%r15d
|
|
xorl %ebx,%r14d
|
|
addl %r13d,%eax
|
|
xorl %ecx,%r15d
|
|
addl %eax,%r8d
|
|
rorl $2,%r14d
|
|
addl %r15d,%eax
|
|
movl %r8d,%r13d
|
|
addl %eax,%r14d
|
|
rorl $14,%r13d
|
|
movl %r14d,%eax
|
|
movl %r9d,%r12d
|
|
xorl %r8d,%r13d
|
|
rorl $9,%r14d
|
|
xorl %r10d,%r12d
|
|
rorl $5,%r13d
|
|
xorl %eax,%r14d
|
|
andl %r8d,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 128-128(%rdi),%xmm10
|
|
xorl %r8d,%r13d
|
|
addl 32(%rsp),%r11d
|
|
movl %eax,%r15d
|
|
rorl $11,%r14d
|
|
xorl %r10d,%r12d
|
|
xorl %ebx,%r15d
|
|
rorl $6,%r13d
|
|
addl %r12d,%r11d
|
|
andl %r15d,%esi
|
|
xorl %eax,%r14d
|
|
addl %r13d,%r11d
|
|
xorl %ebx,%esi
|
|
addl %r11d,%edx
|
|
rorl $2,%r14d
|
|
addl %esi,%r11d
|
|
movl %edx,%r13d
|
|
addl %r11d,%r14d
|
|
rorl $14,%r13d
|
|
movl %r14d,%r11d
|
|
movl %r8d,%r12d
|
|
xorl %edx,%r13d
|
|
rorl $9,%r14d
|
|
xorl %r9d,%r12d
|
|
rorl $5,%r13d
|
|
xorl %r11d,%r14d
|
|
andl %edx,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 144-128(%rdi),%xmm10
|
|
xorl %edx,%r13d
|
|
addl 36(%rsp),%r10d
|
|
movl %r11d,%esi
|
|
rorl $11,%r14d
|
|
xorl %r9d,%r12d
|
|
xorl %eax,%esi
|
|
rorl $6,%r13d
|
|
addl %r12d,%r10d
|
|
andl %esi,%r15d
|
|
xorl %r11d,%r14d
|
|
addl %r13d,%r10d
|
|
xorl %eax,%r15d
|
|
addl %r10d,%ecx
|
|
rorl $2,%r14d
|
|
addl %r15d,%r10d
|
|
movl %ecx,%r13d
|
|
addl %r10d,%r14d
|
|
rorl $14,%r13d
|
|
movl %r14d,%r10d
|
|
movl %edx,%r12d
|
|
xorl %ecx,%r13d
|
|
rorl $9,%r14d
|
|
xorl %r8d,%r12d
|
|
rorl $5,%r13d
|
|
xorl %r10d,%r14d
|
|
andl %ecx,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 160-128(%rdi),%xmm10
|
|
xorl %ecx,%r13d
|
|
addl 40(%rsp),%r9d
|
|
movl %r10d,%r15d
|
|
rorl $11,%r14d
|
|
xorl %r8d,%r12d
|
|
xorl %r11d,%r15d
|
|
rorl $6,%r13d
|
|
addl %r12d,%r9d
|
|
andl %r15d,%esi
|
|
xorl %r10d,%r14d
|
|
addl %r13d,%r9d
|
|
xorl %r11d,%esi
|
|
addl %r9d,%ebx
|
|
rorl $2,%r14d
|
|
addl %esi,%r9d
|
|
movl %ebx,%r13d
|
|
addl %r9d,%r14d
|
|
rorl $14,%r13d
|
|
movl %r14d,%r9d
|
|
movl %ecx,%r12d
|
|
xorl %ebx,%r13d
|
|
rorl $9,%r14d
|
|
xorl %edx,%r12d
|
|
rorl $5,%r13d
|
|
xorl %r9d,%r14d
|
|
andl %ebx,%r12d
|
|
vaesenclast %xmm10,%xmm9,%xmm11
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 176-128(%rdi),%xmm10
|
|
xorl %ebx,%r13d
|
|
addl 44(%rsp),%r8d
|
|
movl %r9d,%esi
|
|
rorl $11,%r14d
|
|
xorl %edx,%r12d
|
|
xorl %r10d,%esi
|
|
rorl $6,%r13d
|
|
addl %r12d,%r8d
|
|
andl %esi,%r15d
|
|
xorl %r9d,%r14d
|
|
addl %r13d,%r8d
|
|
xorl %r10d,%r15d
|
|
addl %r8d,%eax
|
|
rorl $2,%r14d
|
|
addl %r15d,%r8d
|
|
movl %eax,%r13d
|
|
addl %r8d,%r14d
|
|
rorl $14,%r13d
|
|
movl %r14d,%r8d
|
|
movl %ebx,%r12d
|
|
xorl %eax,%r13d
|
|
rorl $9,%r14d
|
|
xorl %ecx,%r12d
|
|
rorl $5,%r13d
|
|
xorl %r8d,%r14d
|
|
andl %eax,%r12d
|
|
vpand %xmm12,%xmm11,%xmm8
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 192-128(%rdi),%xmm10
|
|
xorl %eax,%r13d
|
|
addl 48(%rsp),%edx
|
|
movl %r8d,%r15d
|
|
rorl $11,%r14d
|
|
xorl %ecx,%r12d
|
|
xorl %r9d,%r15d
|
|
rorl $6,%r13d
|
|
addl %r12d,%edx
|
|
andl %r15d,%esi
|
|
xorl %r8d,%r14d
|
|
addl %r13d,%edx
|
|
xorl %r9d,%esi
|
|
addl %edx,%r11d
|
|
rorl $2,%r14d
|
|
addl %esi,%edx
|
|
movl %r11d,%r13d
|
|
addl %edx,%r14d
|
|
rorl $14,%r13d
|
|
movl %r14d,%edx
|
|
movl %eax,%r12d
|
|
xorl %r11d,%r13d
|
|
rorl $9,%r14d
|
|
xorl %ebx,%r12d
|
|
rorl $5,%r13d
|
|
xorl %edx,%r14d
|
|
andl %r11d,%r12d
|
|
vaesenclast %xmm10,%xmm9,%xmm11
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 208-128(%rdi),%xmm10
|
|
xorl %r11d,%r13d
|
|
addl 52(%rsp),%ecx
|
|
movl %edx,%esi
|
|
rorl $11,%r14d
|
|
xorl %ebx,%r12d
|
|
xorl %r8d,%esi
|
|
rorl $6,%r13d
|
|
addl %r12d,%ecx
|
|
andl %esi,%r15d
|
|
xorl %edx,%r14d
|
|
addl %r13d,%ecx
|
|
xorl %r8d,%r15d
|
|
addl %ecx,%r10d
|
|
rorl $2,%r14d
|
|
addl %r15d,%ecx
|
|
movl %r10d,%r13d
|
|
addl %ecx,%r14d
|
|
rorl $14,%r13d
|
|
movl %r14d,%ecx
|
|
movl %r11d,%r12d
|
|
xorl %r10d,%r13d
|
|
rorl $9,%r14d
|
|
xorl %eax,%r12d
|
|
rorl $5,%r13d
|
|
xorl %ecx,%r14d
|
|
andl %r10d,%r12d
|
|
vpand %xmm13,%xmm11,%xmm11
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 224-128(%rdi),%xmm10
|
|
xorl %r10d,%r13d
|
|
addl 56(%rsp),%ebx
|
|
movl %ecx,%r15d
|
|
rorl $11,%r14d
|
|
xorl %eax,%r12d
|
|
xorl %edx,%r15d
|
|
rorl $6,%r13d
|
|
addl %r12d,%ebx
|
|
andl %r15d,%esi
|
|
xorl %ecx,%r14d
|
|
addl %r13d,%ebx
|
|
xorl %edx,%esi
|
|
addl %ebx,%r9d
|
|
rorl $2,%r14d
|
|
addl %esi,%ebx
|
|
movl %r9d,%r13d
|
|
addl %ebx,%r14d
|
|
rorl $14,%r13d
|
|
movl %r14d,%ebx
|
|
movl %r10d,%r12d
|
|
xorl %r9d,%r13d
|
|
rorl $9,%r14d
|
|
xorl %r11d,%r12d
|
|
rorl $5,%r13d
|
|
xorl %ebx,%r14d
|
|
andl %r9d,%r12d
|
|
vpor %xmm11,%xmm8,%xmm8
|
|
vaesenclast %xmm10,%xmm9,%xmm11
|
|
vmovdqu 0-128(%rdi),%xmm10
|
|
xorl %r9d,%r13d
|
|
addl 60(%rsp),%eax
|
|
movl %ebx,%esi
|
|
rorl $11,%r14d
|
|
xorl %r11d,%r12d
|
|
xorl %ecx,%esi
|
|
rorl $6,%r13d
|
|
addl %r12d,%eax
|
|
andl %esi,%r15d
|
|
xorl %ebx,%r14d
|
|
addl %r13d,%eax
|
|
xorl %ecx,%r15d
|
|
addl %eax,%r8d
|
|
rorl $2,%r14d
|
|
addl %r15d,%eax
|
|
movl %r8d,%r13d
|
|
addl %eax,%r14d
|
|
movq 64+0(%rsp),%r12
|
|
movq 64+8(%rsp),%r13
|
|
movq 64+40(%rsp),%r15
|
|
movq 64+48(%rsp),%rsi
|
|
|
|
vpand %xmm14,%xmm11,%xmm11
|
|
movl %r14d,%eax
|
|
vpor %xmm11,%xmm8,%xmm8
|
|
vmovdqu %xmm8,(%r12,%r13,1)
|
|
leaq 16(%r12),%r12
|
|
|
|
addl 0(%r15),%eax
|
|
addl 4(%r15),%ebx
|
|
addl 8(%r15),%ecx
|
|
addl 12(%r15),%edx
|
|
addl 16(%r15),%r8d
|
|
addl 20(%r15),%r9d
|
|
addl 24(%r15),%r10d
|
|
addl 28(%r15),%r11d
|
|
|
|
cmpq 64+16(%rsp),%r12
|
|
|
|
movl %eax,0(%r15)
|
|
movl %ebx,4(%r15)
|
|
movl %ecx,8(%r15)
|
|
movl %edx,12(%r15)
|
|
movl %r8d,16(%r15)
|
|
movl %r9d,20(%r15)
|
|
movl %r10d,24(%r15)
|
|
movl %r11d,28(%r15)
|
|
|
|
jb .Lloop_xop
|
|
|
|
movq 64+32(%rsp),%r8
|
|
movq 120(%rsp),%rsi
|
|
.cfi_def_cfa %rsi,8
|
|
vmovdqu %xmm8,(%r8)
|
|
vzeroall
|
|
movq -48(%rsi),%r15
|
|
.cfi_restore %r15
|
|
movq -40(%rsi),%r14
|
|
.cfi_restore %r14
|
|
movq -32(%rsi),%r13
|
|
.cfi_restore %r13
|
|
movq -24(%rsi),%r12
|
|
.cfi_restore %r12
|
|
movq -16(%rsi),%rbp
|
|
.cfi_restore %rbp
|
|
movq -8(%rsi),%rbx
|
|
.cfi_restore %rbx
|
|
leaq (%rsi),%rsp
|
|
.cfi_def_cfa_register %rsp
|
|
.Lepilogue_xop:
|
|
.byte 0xf3,0xc3
|
|
.cfi_endproc
|
|
.size aesni_cbc_sha256_enc_xop,.-aesni_cbc_sha256_enc_xop
|
|
.type aesni_cbc_sha256_enc_avx,@function
|
|
.align 64
|
|
aesni_cbc_sha256_enc_avx:
|
|
.cfi_startproc
|
|
.Lavx_shortcut:
|
|
movq 8(%rsp),%r10
|
|
movq %rsp,%rax
|
|
.cfi_def_cfa_register %rax
|
|
pushq %rbx
|
|
.cfi_offset %rbx,-16
|
|
pushq %rbp
|
|
.cfi_offset %rbp,-24
|
|
pushq %r12
|
|
.cfi_offset %r12,-32
|
|
pushq %r13
|
|
.cfi_offset %r13,-40
|
|
pushq %r14
|
|
.cfi_offset %r14,-48
|
|
pushq %r15
|
|
.cfi_offset %r15,-56
|
|
subq $128,%rsp
|
|
andq $-64,%rsp
|
|
|
|
shlq $6,%rdx
|
|
subq %rdi,%rsi
|
|
subq %rdi,%r10
|
|
addq %rdi,%rdx
|
|
|
|
|
|
movq %rsi,64+8(%rsp)
|
|
movq %rdx,64+16(%rsp)
|
|
|
|
movq %r8,64+32(%rsp)
|
|
movq %r9,64+40(%rsp)
|
|
movq %r10,64+48(%rsp)
|
|
movq %rax,120(%rsp)
|
|
.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
|
|
.Lprologue_avx:
|
|
vzeroall
|
|
|
|
movq %rdi,%r12
|
|
leaq 128(%rcx),%rdi
|
|
leaq K256+544(%rip),%r13
|
|
movl 240-128(%rdi),%r14d
|
|
movq %r9,%r15
|
|
movq %r10,%rsi
|
|
vmovdqu (%r8),%xmm8
|
|
subq $9,%r14
|
|
|
|
movl 0(%r15),%eax
|
|
movl 4(%r15),%ebx
|
|
movl 8(%r15),%ecx
|
|
movl 12(%r15),%edx
|
|
movl 16(%r15),%r8d
|
|
movl 20(%r15),%r9d
|
|
movl 24(%r15),%r10d
|
|
movl 28(%r15),%r11d
|
|
|
|
vmovdqa 0(%r13,%r14,8),%xmm14
|
|
vmovdqa 16(%r13,%r14,8),%xmm13
|
|
vmovdqa 32(%r13,%r14,8),%xmm12
|
|
vmovdqu 0-128(%rdi),%xmm10
|
|
jmp .Lloop_avx
|
|
.align 16
|
|
.Lloop_avx:
|
|
vmovdqa K256+512(%rip),%xmm7
|
|
vmovdqu 0(%rsi,%r12,1),%xmm0
|
|
vmovdqu 16(%rsi,%r12,1),%xmm1
|
|
vmovdqu 32(%rsi,%r12,1),%xmm2
|
|
vmovdqu 48(%rsi,%r12,1),%xmm3
|
|
vpshufb %xmm7,%xmm0,%xmm0
|
|
leaq K256(%rip),%rbp
|
|
vpshufb %xmm7,%xmm1,%xmm1
|
|
vpshufb %xmm7,%xmm2,%xmm2
|
|
vpaddd 0(%rbp),%xmm0,%xmm4
|
|
vpshufb %xmm7,%xmm3,%xmm3
|
|
vpaddd 32(%rbp),%xmm1,%xmm5
|
|
vpaddd 64(%rbp),%xmm2,%xmm6
|
|
vpaddd 96(%rbp),%xmm3,%xmm7
|
|
vmovdqa %xmm4,0(%rsp)
|
|
movl %eax,%r14d
|
|
vmovdqa %xmm5,16(%rsp)
|
|
movl %ebx,%esi
|
|
vmovdqa %xmm6,32(%rsp)
|
|
xorl %ecx,%esi
|
|
vmovdqa %xmm7,48(%rsp)
|
|
movl %r8d,%r13d
|
|
jmp .Lavx_00_47
|
|
|
|
.align 16
|
|
.Lavx_00_47:
|
|
subq $-32*4,%rbp
|
|
vmovdqu (%r12),%xmm9
|
|
movq %r12,64+0(%rsp)
|
|
vpalignr $4,%xmm0,%xmm1,%xmm4
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%eax
|
|
movl %r9d,%r12d
|
|
vpalignr $4,%xmm2,%xmm3,%xmm7
|
|
xorl %r8d,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %r10d,%r12d
|
|
vpsrld $7,%xmm4,%xmm6
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %eax,%r14d
|
|
andl %r8d,%r12d
|
|
vpaddd %xmm7,%xmm0,%xmm0
|
|
vpxor %xmm10,%xmm9,%xmm9
|
|
vmovdqu 16-128(%rdi),%xmm10
|
|
xorl %r8d,%r13d
|
|
addl 0(%rsp),%r11d
|
|
movl %eax,%r15d
|
|
vpsrld $3,%xmm4,%xmm7
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %r10d,%r12d
|
|
xorl %ebx,%r15d
|
|
vpslld $14,%xmm4,%xmm5
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%r11d
|
|
andl %r15d,%esi
|
|
vpxor %xmm6,%xmm7,%xmm4
|
|
xorl %eax,%r14d
|
|
addl %r13d,%r11d
|
|
xorl %ebx,%esi
|
|
vpshufd $250,%xmm3,%xmm7
|
|
addl %r11d,%edx
|
|
shrdl $2,%r14d,%r14d
|
|
addl %esi,%r11d
|
|
vpsrld $11,%xmm6,%xmm6
|
|
movl %edx,%r13d
|
|
addl %r11d,%r14d
|
|
shrdl $14,%r13d,%r13d
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
movl %r14d,%r11d
|
|
movl %r8d,%r12d
|
|
xorl %edx,%r13d
|
|
vpslld $11,%xmm5,%xmm5
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %r9d,%r12d
|
|
shrdl $5,%r13d,%r13d
|
|
vpxor %xmm6,%xmm4,%xmm4
|
|
xorl %r11d,%r14d
|
|
andl %edx,%r12d
|
|
vpxor %xmm8,%xmm9,%xmm9
|
|
xorl %edx,%r13d
|
|
vpsrld $10,%xmm7,%xmm6
|
|
addl 4(%rsp),%r10d
|
|
movl %r11d,%esi
|
|
shrdl $11,%r14d,%r14d
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
xorl %r9d,%r12d
|
|
xorl %eax,%esi
|
|
shrdl $6,%r13d,%r13d
|
|
vpsrlq $17,%xmm7,%xmm7
|
|
addl %r12d,%r10d
|
|
andl %esi,%r15d
|
|
xorl %r11d,%r14d
|
|
vpaddd %xmm4,%xmm0,%xmm0
|
|
addl %r13d,%r10d
|
|
xorl %eax,%r15d
|
|
addl %r10d,%ecx
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
shrdl $2,%r14d,%r14d
|
|
addl %r15d,%r10d
|
|
movl %ecx,%r13d
|
|
vpsrlq $2,%xmm7,%xmm7
|
|
addl %r10d,%r14d
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%r10d
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
movl %edx,%r12d
|
|
xorl %ecx,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
vpshufd $132,%xmm6,%xmm6
|
|
xorl %r8d,%r12d
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %r10d,%r14d
|
|
vpsrldq $8,%xmm6,%xmm6
|
|
andl %ecx,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 32-128(%rdi),%xmm10
|
|
xorl %ecx,%r13d
|
|
addl 8(%rsp),%r9d
|
|
vpaddd %xmm6,%xmm0,%xmm0
|
|
movl %r10d,%r15d
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %r8d,%r12d
|
|
vpshufd $80,%xmm0,%xmm7
|
|
xorl %r11d,%r15d
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%r9d
|
|
vpsrld $10,%xmm7,%xmm6
|
|
andl %r15d,%esi
|
|
xorl %r10d,%r14d
|
|
addl %r13d,%r9d
|
|
vpsrlq $17,%xmm7,%xmm7
|
|
xorl %r11d,%esi
|
|
addl %r9d,%ebx
|
|
shrdl $2,%r14d,%r14d
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
addl %esi,%r9d
|
|
movl %ebx,%r13d
|
|
addl %r9d,%r14d
|
|
vpsrlq $2,%xmm7,%xmm7
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%r9d
|
|
movl %ecx,%r12d
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
xorl %ebx,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %edx,%r12d
|
|
vpshufd $232,%xmm6,%xmm6
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %r9d,%r14d
|
|
andl %ebx,%r12d
|
|
vpslldq $8,%xmm6,%xmm6
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 48-128(%rdi),%xmm10
|
|
xorl %ebx,%r13d
|
|
addl 12(%rsp),%r8d
|
|
movl %r9d,%esi
|
|
vpaddd %xmm6,%xmm0,%xmm0
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %edx,%r12d
|
|
xorl %r10d,%esi
|
|
vpaddd 0(%rbp),%xmm0,%xmm6
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%r8d
|
|
andl %esi,%r15d
|
|
xorl %r9d,%r14d
|
|
addl %r13d,%r8d
|
|
xorl %r10d,%r15d
|
|
addl %r8d,%eax
|
|
shrdl $2,%r14d,%r14d
|
|
addl %r15d,%r8d
|
|
movl %eax,%r13d
|
|
addl %r8d,%r14d
|
|
vmovdqa %xmm6,0(%rsp)
|
|
vpalignr $4,%xmm1,%xmm2,%xmm4
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%r8d
|
|
movl %ebx,%r12d
|
|
vpalignr $4,%xmm3,%xmm0,%xmm7
|
|
xorl %eax,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %ecx,%r12d
|
|
vpsrld $7,%xmm4,%xmm6
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %r8d,%r14d
|
|
andl %eax,%r12d
|
|
vpaddd %xmm7,%xmm1,%xmm1
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 64-128(%rdi),%xmm10
|
|
xorl %eax,%r13d
|
|
addl 16(%rsp),%edx
|
|
movl %r8d,%r15d
|
|
vpsrld $3,%xmm4,%xmm7
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %ecx,%r12d
|
|
xorl %r9d,%r15d
|
|
vpslld $14,%xmm4,%xmm5
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%edx
|
|
andl %r15d,%esi
|
|
vpxor %xmm6,%xmm7,%xmm4
|
|
xorl %r8d,%r14d
|
|
addl %r13d,%edx
|
|
xorl %r9d,%esi
|
|
vpshufd $250,%xmm0,%xmm7
|
|
addl %edx,%r11d
|
|
shrdl $2,%r14d,%r14d
|
|
addl %esi,%edx
|
|
vpsrld $11,%xmm6,%xmm6
|
|
movl %r11d,%r13d
|
|
addl %edx,%r14d
|
|
shrdl $14,%r13d,%r13d
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
movl %r14d,%edx
|
|
movl %eax,%r12d
|
|
xorl %r11d,%r13d
|
|
vpslld $11,%xmm5,%xmm5
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %ebx,%r12d
|
|
shrdl $5,%r13d,%r13d
|
|
vpxor %xmm6,%xmm4,%xmm4
|
|
xorl %edx,%r14d
|
|
andl %r11d,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 80-128(%rdi),%xmm10
|
|
xorl %r11d,%r13d
|
|
vpsrld $10,%xmm7,%xmm6
|
|
addl 20(%rsp),%ecx
|
|
movl %edx,%esi
|
|
shrdl $11,%r14d,%r14d
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
xorl %ebx,%r12d
|
|
xorl %r8d,%esi
|
|
shrdl $6,%r13d,%r13d
|
|
vpsrlq $17,%xmm7,%xmm7
|
|
addl %r12d,%ecx
|
|
andl %esi,%r15d
|
|
xorl %edx,%r14d
|
|
vpaddd %xmm4,%xmm1,%xmm1
|
|
addl %r13d,%ecx
|
|
xorl %r8d,%r15d
|
|
addl %ecx,%r10d
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
shrdl $2,%r14d,%r14d
|
|
addl %r15d,%ecx
|
|
movl %r10d,%r13d
|
|
vpsrlq $2,%xmm7,%xmm7
|
|
addl %ecx,%r14d
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%ecx
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
movl %r11d,%r12d
|
|
xorl %r10d,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
vpshufd $132,%xmm6,%xmm6
|
|
xorl %eax,%r12d
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %ecx,%r14d
|
|
vpsrldq $8,%xmm6,%xmm6
|
|
andl %r10d,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 96-128(%rdi),%xmm10
|
|
xorl %r10d,%r13d
|
|
addl 24(%rsp),%ebx
|
|
vpaddd %xmm6,%xmm1,%xmm1
|
|
movl %ecx,%r15d
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %eax,%r12d
|
|
vpshufd $80,%xmm1,%xmm7
|
|
xorl %edx,%r15d
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%ebx
|
|
vpsrld $10,%xmm7,%xmm6
|
|
andl %r15d,%esi
|
|
xorl %ecx,%r14d
|
|
addl %r13d,%ebx
|
|
vpsrlq $17,%xmm7,%xmm7
|
|
xorl %edx,%esi
|
|
addl %ebx,%r9d
|
|
shrdl $2,%r14d,%r14d
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
addl %esi,%ebx
|
|
movl %r9d,%r13d
|
|
addl %ebx,%r14d
|
|
vpsrlq $2,%xmm7,%xmm7
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%ebx
|
|
movl %r10d,%r12d
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
xorl %r9d,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %r11d,%r12d
|
|
vpshufd $232,%xmm6,%xmm6
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %ebx,%r14d
|
|
andl %r9d,%r12d
|
|
vpslldq $8,%xmm6,%xmm6
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 112-128(%rdi),%xmm10
|
|
xorl %r9d,%r13d
|
|
addl 28(%rsp),%eax
|
|
movl %ebx,%esi
|
|
vpaddd %xmm6,%xmm1,%xmm1
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %r11d,%r12d
|
|
xorl %ecx,%esi
|
|
vpaddd 32(%rbp),%xmm1,%xmm6
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%eax
|
|
andl %esi,%r15d
|
|
xorl %ebx,%r14d
|
|
addl %r13d,%eax
|
|
xorl %ecx,%r15d
|
|
addl %eax,%r8d
|
|
shrdl $2,%r14d,%r14d
|
|
addl %r15d,%eax
|
|
movl %r8d,%r13d
|
|
addl %eax,%r14d
|
|
vmovdqa %xmm6,16(%rsp)
|
|
vpalignr $4,%xmm2,%xmm3,%xmm4
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%eax
|
|
movl %r9d,%r12d
|
|
vpalignr $4,%xmm0,%xmm1,%xmm7
|
|
xorl %r8d,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %r10d,%r12d
|
|
vpsrld $7,%xmm4,%xmm6
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %eax,%r14d
|
|
andl %r8d,%r12d
|
|
vpaddd %xmm7,%xmm2,%xmm2
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 128-128(%rdi),%xmm10
|
|
xorl %r8d,%r13d
|
|
addl 32(%rsp),%r11d
|
|
movl %eax,%r15d
|
|
vpsrld $3,%xmm4,%xmm7
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %r10d,%r12d
|
|
xorl %ebx,%r15d
|
|
vpslld $14,%xmm4,%xmm5
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%r11d
|
|
andl %r15d,%esi
|
|
vpxor %xmm6,%xmm7,%xmm4
|
|
xorl %eax,%r14d
|
|
addl %r13d,%r11d
|
|
xorl %ebx,%esi
|
|
vpshufd $250,%xmm1,%xmm7
|
|
addl %r11d,%edx
|
|
shrdl $2,%r14d,%r14d
|
|
addl %esi,%r11d
|
|
vpsrld $11,%xmm6,%xmm6
|
|
movl %edx,%r13d
|
|
addl %r11d,%r14d
|
|
shrdl $14,%r13d,%r13d
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
movl %r14d,%r11d
|
|
movl %r8d,%r12d
|
|
xorl %edx,%r13d
|
|
vpslld $11,%xmm5,%xmm5
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %r9d,%r12d
|
|
shrdl $5,%r13d,%r13d
|
|
vpxor %xmm6,%xmm4,%xmm4
|
|
xorl %r11d,%r14d
|
|
andl %edx,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 144-128(%rdi),%xmm10
|
|
xorl %edx,%r13d
|
|
vpsrld $10,%xmm7,%xmm6
|
|
addl 36(%rsp),%r10d
|
|
movl %r11d,%esi
|
|
shrdl $11,%r14d,%r14d
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
xorl %r9d,%r12d
|
|
xorl %eax,%esi
|
|
shrdl $6,%r13d,%r13d
|
|
vpsrlq $17,%xmm7,%xmm7
|
|
addl %r12d,%r10d
|
|
andl %esi,%r15d
|
|
xorl %r11d,%r14d
|
|
vpaddd %xmm4,%xmm2,%xmm2
|
|
addl %r13d,%r10d
|
|
xorl %eax,%r15d
|
|
addl %r10d,%ecx
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
shrdl $2,%r14d,%r14d
|
|
addl %r15d,%r10d
|
|
movl %ecx,%r13d
|
|
vpsrlq $2,%xmm7,%xmm7
|
|
addl %r10d,%r14d
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%r10d
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
movl %edx,%r12d
|
|
xorl %ecx,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
vpshufd $132,%xmm6,%xmm6
|
|
xorl %r8d,%r12d
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %r10d,%r14d
|
|
vpsrldq $8,%xmm6,%xmm6
|
|
andl %ecx,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 160-128(%rdi),%xmm10
|
|
xorl %ecx,%r13d
|
|
addl 40(%rsp),%r9d
|
|
vpaddd %xmm6,%xmm2,%xmm2
|
|
movl %r10d,%r15d
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %r8d,%r12d
|
|
vpshufd $80,%xmm2,%xmm7
|
|
xorl %r11d,%r15d
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%r9d
|
|
vpsrld $10,%xmm7,%xmm6
|
|
andl %r15d,%esi
|
|
xorl %r10d,%r14d
|
|
addl %r13d,%r9d
|
|
vpsrlq $17,%xmm7,%xmm7
|
|
xorl %r11d,%esi
|
|
addl %r9d,%ebx
|
|
shrdl $2,%r14d,%r14d
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
addl %esi,%r9d
|
|
movl %ebx,%r13d
|
|
addl %r9d,%r14d
|
|
vpsrlq $2,%xmm7,%xmm7
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%r9d
|
|
movl %ecx,%r12d
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
xorl %ebx,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %edx,%r12d
|
|
vpshufd $232,%xmm6,%xmm6
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %r9d,%r14d
|
|
andl %ebx,%r12d
|
|
vpslldq $8,%xmm6,%xmm6
|
|
vaesenclast %xmm10,%xmm9,%xmm11
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 176-128(%rdi),%xmm10
|
|
xorl %ebx,%r13d
|
|
addl 44(%rsp),%r8d
|
|
movl %r9d,%esi
|
|
vpaddd %xmm6,%xmm2,%xmm2
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %edx,%r12d
|
|
xorl %r10d,%esi
|
|
vpaddd 64(%rbp),%xmm2,%xmm6
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%r8d
|
|
andl %esi,%r15d
|
|
xorl %r9d,%r14d
|
|
addl %r13d,%r8d
|
|
xorl %r10d,%r15d
|
|
addl %r8d,%eax
|
|
shrdl $2,%r14d,%r14d
|
|
addl %r15d,%r8d
|
|
movl %eax,%r13d
|
|
addl %r8d,%r14d
|
|
vmovdqa %xmm6,32(%rsp)
|
|
vpalignr $4,%xmm3,%xmm0,%xmm4
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%r8d
|
|
movl %ebx,%r12d
|
|
vpalignr $4,%xmm1,%xmm2,%xmm7
|
|
xorl %eax,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %ecx,%r12d
|
|
vpsrld $7,%xmm4,%xmm6
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %r8d,%r14d
|
|
andl %eax,%r12d
|
|
vpaddd %xmm7,%xmm3,%xmm3
|
|
vpand %xmm12,%xmm11,%xmm8
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 192-128(%rdi),%xmm10
|
|
xorl %eax,%r13d
|
|
addl 48(%rsp),%edx
|
|
movl %r8d,%r15d
|
|
vpsrld $3,%xmm4,%xmm7
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %ecx,%r12d
|
|
xorl %r9d,%r15d
|
|
vpslld $14,%xmm4,%xmm5
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%edx
|
|
andl %r15d,%esi
|
|
vpxor %xmm6,%xmm7,%xmm4
|
|
xorl %r8d,%r14d
|
|
addl %r13d,%edx
|
|
xorl %r9d,%esi
|
|
vpshufd $250,%xmm2,%xmm7
|
|
addl %edx,%r11d
|
|
shrdl $2,%r14d,%r14d
|
|
addl %esi,%edx
|
|
vpsrld $11,%xmm6,%xmm6
|
|
movl %r11d,%r13d
|
|
addl %edx,%r14d
|
|
shrdl $14,%r13d,%r13d
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
movl %r14d,%edx
|
|
movl %eax,%r12d
|
|
xorl %r11d,%r13d
|
|
vpslld $11,%xmm5,%xmm5
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %ebx,%r12d
|
|
shrdl $5,%r13d,%r13d
|
|
vpxor %xmm6,%xmm4,%xmm4
|
|
xorl %edx,%r14d
|
|
andl %r11d,%r12d
|
|
vaesenclast %xmm10,%xmm9,%xmm11
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 208-128(%rdi),%xmm10
|
|
xorl %r11d,%r13d
|
|
vpsrld $10,%xmm7,%xmm6
|
|
addl 52(%rsp),%ecx
|
|
movl %edx,%esi
|
|
shrdl $11,%r14d,%r14d
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
xorl %ebx,%r12d
|
|
xorl %r8d,%esi
|
|
shrdl $6,%r13d,%r13d
|
|
vpsrlq $17,%xmm7,%xmm7
|
|
addl %r12d,%ecx
|
|
andl %esi,%r15d
|
|
xorl %edx,%r14d
|
|
vpaddd %xmm4,%xmm3,%xmm3
|
|
addl %r13d,%ecx
|
|
xorl %r8d,%r15d
|
|
addl %ecx,%r10d
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
shrdl $2,%r14d,%r14d
|
|
addl %r15d,%ecx
|
|
movl %r10d,%r13d
|
|
vpsrlq $2,%xmm7,%xmm7
|
|
addl %ecx,%r14d
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%ecx
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
movl %r11d,%r12d
|
|
xorl %r10d,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
vpshufd $132,%xmm6,%xmm6
|
|
xorl %eax,%r12d
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %ecx,%r14d
|
|
vpsrldq $8,%xmm6,%xmm6
|
|
andl %r10d,%r12d
|
|
vpand %xmm13,%xmm11,%xmm11
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 224-128(%rdi),%xmm10
|
|
xorl %r10d,%r13d
|
|
addl 56(%rsp),%ebx
|
|
vpaddd %xmm6,%xmm3,%xmm3
|
|
movl %ecx,%r15d
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %eax,%r12d
|
|
vpshufd $80,%xmm3,%xmm7
|
|
xorl %edx,%r15d
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%ebx
|
|
vpsrld $10,%xmm7,%xmm6
|
|
andl %r15d,%esi
|
|
xorl %ecx,%r14d
|
|
addl %r13d,%ebx
|
|
vpsrlq $17,%xmm7,%xmm7
|
|
xorl %edx,%esi
|
|
addl %ebx,%r9d
|
|
shrdl $2,%r14d,%r14d
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
addl %esi,%ebx
|
|
movl %r9d,%r13d
|
|
addl %ebx,%r14d
|
|
vpsrlq $2,%xmm7,%xmm7
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%ebx
|
|
movl %r10d,%r12d
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
xorl %r9d,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %r11d,%r12d
|
|
vpshufd $232,%xmm6,%xmm6
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %ebx,%r14d
|
|
andl %r9d,%r12d
|
|
vpslldq $8,%xmm6,%xmm6
|
|
vpor %xmm11,%xmm8,%xmm8
|
|
vaesenclast %xmm10,%xmm9,%xmm11
|
|
vmovdqu 0-128(%rdi),%xmm10
|
|
xorl %r9d,%r13d
|
|
addl 60(%rsp),%eax
|
|
movl %ebx,%esi
|
|
vpaddd %xmm6,%xmm3,%xmm3
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %r11d,%r12d
|
|
xorl %ecx,%esi
|
|
vpaddd 96(%rbp),%xmm3,%xmm6
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%eax
|
|
andl %esi,%r15d
|
|
xorl %ebx,%r14d
|
|
addl %r13d,%eax
|
|
xorl %ecx,%r15d
|
|
addl %eax,%r8d
|
|
shrdl $2,%r14d,%r14d
|
|
addl %r15d,%eax
|
|
movl %r8d,%r13d
|
|
addl %eax,%r14d
|
|
vmovdqa %xmm6,48(%rsp)
|
|
movq 64+0(%rsp),%r12
|
|
vpand %xmm14,%xmm11,%xmm11
|
|
movq 64+8(%rsp),%r15
|
|
vpor %xmm11,%xmm8,%xmm8
|
|
vmovdqu %xmm8,(%r15,%r12,1)
|
|
leaq 16(%r12),%r12
|
|
cmpb $0,131(%rbp)
|
|
jne .Lavx_00_47
|
|
vmovdqu (%r12),%xmm9
|
|
movq %r12,64+0(%rsp)
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%eax
|
|
movl %r9d,%r12d
|
|
xorl %r8d,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %r10d,%r12d
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %eax,%r14d
|
|
andl %r8d,%r12d
|
|
vpxor %xmm10,%xmm9,%xmm9
|
|
vmovdqu 16-128(%rdi),%xmm10
|
|
xorl %r8d,%r13d
|
|
addl 0(%rsp),%r11d
|
|
movl %eax,%r15d
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %r10d,%r12d
|
|
xorl %ebx,%r15d
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%r11d
|
|
andl %r15d,%esi
|
|
xorl %eax,%r14d
|
|
addl %r13d,%r11d
|
|
xorl %ebx,%esi
|
|
addl %r11d,%edx
|
|
shrdl $2,%r14d,%r14d
|
|
addl %esi,%r11d
|
|
movl %edx,%r13d
|
|
addl %r11d,%r14d
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%r11d
|
|
movl %r8d,%r12d
|
|
xorl %edx,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %r9d,%r12d
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %r11d,%r14d
|
|
andl %edx,%r12d
|
|
vpxor %xmm8,%xmm9,%xmm9
|
|
xorl %edx,%r13d
|
|
addl 4(%rsp),%r10d
|
|
movl %r11d,%esi
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %r9d,%r12d
|
|
xorl %eax,%esi
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%r10d
|
|
andl %esi,%r15d
|
|
xorl %r11d,%r14d
|
|
addl %r13d,%r10d
|
|
xorl %eax,%r15d
|
|
addl %r10d,%ecx
|
|
shrdl $2,%r14d,%r14d
|
|
addl %r15d,%r10d
|
|
movl %ecx,%r13d
|
|
addl %r10d,%r14d
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%r10d
|
|
movl %edx,%r12d
|
|
xorl %ecx,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %r8d,%r12d
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %r10d,%r14d
|
|
andl %ecx,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 32-128(%rdi),%xmm10
|
|
xorl %ecx,%r13d
|
|
addl 8(%rsp),%r9d
|
|
movl %r10d,%r15d
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %r8d,%r12d
|
|
xorl %r11d,%r15d
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%r9d
|
|
andl %r15d,%esi
|
|
xorl %r10d,%r14d
|
|
addl %r13d,%r9d
|
|
xorl %r11d,%esi
|
|
addl %r9d,%ebx
|
|
shrdl $2,%r14d,%r14d
|
|
addl %esi,%r9d
|
|
movl %ebx,%r13d
|
|
addl %r9d,%r14d
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%r9d
|
|
movl %ecx,%r12d
|
|
xorl %ebx,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %edx,%r12d
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %r9d,%r14d
|
|
andl %ebx,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 48-128(%rdi),%xmm10
|
|
xorl %ebx,%r13d
|
|
addl 12(%rsp),%r8d
|
|
movl %r9d,%esi
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %edx,%r12d
|
|
xorl %r10d,%esi
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%r8d
|
|
andl %esi,%r15d
|
|
xorl %r9d,%r14d
|
|
addl %r13d,%r8d
|
|
xorl %r10d,%r15d
|
|
addl %r8d,%eax
|
|
shrdl $2,%r14d,%r14d
|
|
addl %r15d,%r8d
|
|
movl %eax,%r13d
|
|
addl %r8d,%r14d
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%r8d
|
|
movl %ebx,%r12d
|
|
xorl %eax,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %ecx,%r12d
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %r8d,%r14d
|
|
andl %eax,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 64-128(%rdi),%xmm10
|
|
xorl %eax,%r13d
|
|
addl 16(%rsp),%edx
|
|
movl %r8d,%r15d
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %ecx,%r12d
|
|
xorl %r9d,%r15d
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%edx
|
|
andl %r15d,%esi
|
|
xorl %r8d,%r14d
|
|
addl %r13d,%edx
|
|
xorl %r9d,%esi
|
|
addl %edx,%r11d
|
|
shrdl $2,%r14d,%r14d
|
|
addl %esi,%edx
|
|
movl %r11d,%r13d
|
|
addl %edx,%r14d
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%edx
|
|
movl %eax,%r12d
|
|
xorl %r11d,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %ebx,%r12d
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %edx,%r14d
|
|
andl %r11d,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 80-128(%rdi),%xmm10
|
|
xorl %r11d,%r13d
|
|
addl 20(%rsp),%ecx
|
|
movl %edx,%esi
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %ebx,%r12d
|
|
xorl %r8d,%esi
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%ecx
|
|
andl %esi,%r15d
|
|
xorl %edx,%r14d
|
|
addl %r13d,%ecx
|
|
xorl %r8d,%r15d
|
|
addl %ecx,%r10d
|
|
shrdl $2,%r14d,%r14d
|
|
addl %r15d,%ecx
|
|
movl %r10d,%r13d
|
|
addl %ecx,%r14d
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%ecx
|
|
movl %r11d,%r12d
|
|
xorl %r10d,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %eax,%r12d
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %ecx,%r14d
|
|
andl %r10d,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 96-128(%rdi),%xmm10
|
|
xorl %r10d,%r13d
|
|
addl 24(%rsp),%ebx
|
|
movl %ecx,%r15d
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %eax,%r12d
|
|
xorl %edx,%r15d
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%ebx
|
|
andl %r15d,%esi
|
|
xorl %ecx,%r14d
|
|
addl %r13d,%ebx
|
|
xorl %edx,%esi
|
|
addl %ebx,%r9d
|
|
shrdl $2,%r14d,%r14d
|
|
addl %esi,%ebx
|
|
movl %r9d,%r13d
|
|
addl %ebx,%r14d
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%ebx
|
|
movl %r10d,%r12d
|
|
xorl %r9d,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %r11d,%r12d
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %ebx,%r14d
|
|
andl %r9d,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 112-128(%rdi),%xmm10
|
|
xorl %r9d,%r13d
|
|
addl 28(%rsp),%eax
|
|
movl %ebx,%esi
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %r11d,%r12d
|
|
xorl %ecx,%esi
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%eax
|
|
andl %esi,%r15d
|
|
xorl %ebx,%r14d
|
|
addl %r13d,%eax
|
|
xorl %ecx,%r15d
|
|
addl %eax,%r8d
|
|
shrdl $2,%r14d,%r14d
|
|
addl %r15d,%eax
|
|
movl %r8d,%r13d
|
|
addl %eax,%r14d
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%eax
|
|
movl %r9d,%r12d
|
|
xorl %r8d,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %r10d,%r12d
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %eax,%r14d
|
|
andl %r8d,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 128-128(%rdi),%xmm10
|
|
xorl %r8d,%r13d
|
|
addl 32(%rsp),%r11d
|
|
movl %eax,%r15d
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %r10d,%r12d
|
|
xorl %ebx,%r15d
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%r11d
|
|
andl %r15d,%esi
|
|
xorl %eax,%r14d
|
|
addl %r13d,%r11d
|
|
xorl %ebx,%esi
|
|
addl %r11d,%edx
|
|
shrdl $2,%r14d,%r14d
|
|
addl %esi,%r11d
|
|
movl %edx,%r13d
|
|
addl %r11d,%r14d
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%r11d
|
|
movl %r8d,%r12d
|
|
xorl %edx,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %r9d,%r12d
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %r11d,%r14d
|
|
andl %edx,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 144-128(%rdi),%xmm10
|
|
xorl %edx,%r13d
|
|
addl 36(%rsp),%r10d
|
|
movl %r11d,%esi
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %r9d,%r12d
|
|
xorl %eax,%esi
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%r10d
|
|
andl %esi,%r15d
|
|
xorl %r11d,%r14d
|
|
addl %r13d,%r10d
|
|
xorl %eax,%r15d
|
|
addl %r10d,%ecx
|
|
shrdl $2,%r14d,%r14d
|
|
addl %r15d,%r10d
|
|
movl %ecx,%r13d
|
|
addl %r10d,%r14d
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%r10d
|
|
movl %edx,%r12d
|
|
xorl %ecx,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %r8d,%r12d
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %r10d,%r14d
|
|
andl %ecx,%r12d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 160-128(%rdi),%xmm10
|
|
xorl %ecx,%r13d
|
|
addl 40(%rsp),%r9d
|
|
movl %r10d,%r15d
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %r8d,%r12d
|
|
xorl %r11d,%r15d
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%r9d
|
|
andl %r15d,%esi
|
|
xorl %r10d,%r14d
|
|
addl %r13d,%r9d
|
|
xorl %r11d,%esi
|
|
addl %r9d,%ebx
|
|
shrdl $2,%r14d,%r14d
|
|
addl %esi,%r9d
|
|
movl %ebx,%r13d
|
|
addl %r9d,%r14d
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%r9d
|
|
movl %ecx,%r12d
|
|
xorl %ebx,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %edx,%r12d
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %r9d,%r14d
|
|
andl %ebx,%r12d
|
|
vaesenclast %xmm10,%xmm9,%xmm11
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 176-128(%rdi),%xmm10
|
|
xorl %ebx,%r13d
|
|
addl 44(%rsp),%r8d
|
|
movl %r9d,%esi
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %edx,%r12d
|
|
xorl %r10d,%esi
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%r8d
|
|
andl %esi,%r15d
|
|
xorl %r9d,%r14d
|
|
addl %r13d,%r8d
|
|
xorl %r10d,%r15d
|
|
addl %r8d,%eax
|
|
shrdl $2,%r14d,%r14d
|
|
addl %r15d,%r8d
|
|
movl %eax,%r13d
|
|
addl %r8d,%r14d
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%r8d
|
|
movl %ebx,%r12d
|
|
xorl %eax,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %ecx,%r12d
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %r8d,%r14d
|
|
andl %eax,%r12d
|
|
vpand %xmm12,%xmm11,%xmm8
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 192-128(%rdi),%xmm10
|
|
xorl %eax,%r13d
|
|
addl 48(%rsp),%edx
|
|
movl %r8d,%r15d
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %ecx,%r12d
|
|
xorl %r9d,%r15d
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%edx
|
|
andl %r15d,%esi
|
|
xorl %r8d,%r14d
|
|
addl %r13d,%edx
|
|
xorl %r9d,%esi
|
|
addl %edx,%r11d
|
|
shrdl $2,%r14d,%r14d
|
|
addl %esi,%edx
|
|
movl %r11d,%r13d
|
|
addl %edx,%r14d
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%edx
|
|
movl %eax,%r12d
|
|
xorl %r11d,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %ebx,%r12d
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %edx,%r14d
|
|
andl %r11d,%r12d
|
|
vaesenclast %xmm10,%xmm9,%xmm11
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 208-128(%rdi),%xmm10
|
|
xorl %r11d,%r13d
|
|
addl 52(%rsp),%ecx
|
|
movl %edx,%esi
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %ebx,%r12d
|
|
xorl %r8d,%esi
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%ecx
|
|
andl %esi,%r15d
|
|
xorl %edx,%r14d
|
|
addl %r13d,%ecx
|
|
xorl %r8d,%r15d
|
|
addl %ecx,%r10d
|
|
shrdl $2,%r14d,%r14d
|
|
addl %r15d,%ecx
|
|
movl %r10d,%r13d
|
|
addl %ecx,%r14d
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%ecx
|
|
movl %r11d,%r12d
|
|
xorl %r10d,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %eax,%r12d
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %ecx,%r14d
|
|
andl %r10d,%r12d
|
|
vpand %xmm13,%xmm11,%xmm11
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 224-128(%rdi),%xmm10
|
|
xorl %r10d,%r13d
|
|
addl 56(%rsp),%ebx
|
|
movl %ecx,%r15d
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %eax,%r12d
|
|
xorl %edx,%r15d
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%ebx
|
|
andl %r15d,%esi
|
|
xorl %ecx,%r14d
|
|
addl %r13d,%ebx
|
|
xorl %edx,%esi
|
|
addl %ebx,%r9d
|
|
shrdl $2,%r14d,%r14d
|
|
addl %esi,%ebx
|
|
movl %r9d,%r13d
|
|
addl %ebx,%r14d
|
|
shrdl $14,%r13d,%r13d
|
|
movl %r14d,%ebx
|
|
movl %r10d,%r12d
|
|
xorl %r9d,%r13d
|
|
shrdl $9,%r14d,%r14d
|
|
xorl %r11d,%r12d
|
|
shrdl $5,%r13d,%r13d
|
|
xorl %ebx,%r14d
|
|
andl %r9d,%r12d
|
|
vpor %xmm11,%xmm8,%xmm8
|
|
vaesenclast %xmm10,%xmm9,%xmm11
|
|
vmovdqu 0-128(%rdi),%xmm10
|
|
xorl %r9d,%r13d
|
|
addl 60(%rsp),%eax
|
|
movl %ebx,%esi
|
|
shrdl $11,%r14d,%r14d
|
|
xorl %r11d,%r12d
|
|
xorl %ecx,%esi
|
|
shrdl $6,%r13d,%r13d
|
|
addl %r12d,%eax
|
|
andl %esi,%r15d
|
|
xorl %ebx,%r14d
|
|
addl %r13d,%eax
|
|
xorl %ecx,%r15d
|
|
addl %eax,%r8d
|
|
shrdl $2,%r14d,%r14d
|
|
addl %r15d,%eax
|
|
movl %r8d,%r13d
|
|
addl %eax,%r14d
|
|
movq 64+0(%rsp),%r12
|
|
movq 64+8(%rsp),%r13
|
|
movq 64+40(%rsp),%r15
|
|
movq 64+48(%rsp),%rsi
|
|
|
|
vpand %xmm14,%xmm11,%xmm11
|
|
movl %r14d,%eax
|
|
vpor %xmm11,%xmm8,%xmm8
|
|
vmovdqu %xmm8,(%r12,%r13,1)
|
|
leaq 16(%r12),%r12
|
|
|
|
addl 0(%r15),%eax
|
|
addl 4(%r15),%ebx
|
|
addl 8(%r15),%ecx
|
|
addl 12(%r15),%edx
|
|
addl 16(%r15),%r8d
|
|
addl 20(%r15),%r9d
|
|
addl 24(%r15),%r10d
|
|
addl 28(%r15),%r11d
|
|
|
|
cmpq 64+16(%rsp),%r12
|
|
|
|
movl %eax,0(%r15)
|
|
movl %ebx,4(%r15)
|
|
movl %ecx,8(%r15)
|
|
movl %edx,12(%r15)
|
|
movl %r8d,16(%r15)
|
|
movl %r9d,20(%r15)
|
|
movl %r10d,24(%r15)
|
|
movl %r11d,28(%r15)
|
|
jb .Lloop_avx
|
|
|
|
movq 64+32(%rsp),%r8
|
|
movq 120(%rsp),%rsi
|
|
.cfi_def_cfa %rsi,8
|
|
vmovdqu %xmm8,(%r8)
|
|
vzeroall
|
|
movq -48(%rsi),%r15
|
|
.cfi_restore %r15
|
|
movq -40(%rsi),%r14
|
|
.cfi_restore %r14
|
|
movq -32(%rsi),%r13
|
|
.cfi_restore %r13
|
|
movq -24(%rsi),%r12
|
|
.cfi_restore %r12
|
|
movq -16(%rsi),%rbp
|
|
.cfi_restore %rbp
|
|
movq -8(%rsi),%rbx
|
|
.cfi_restore %rbx
|
|
leaq (%rsi),%rsp
|
|
.cfi_def_cfa_register %rsp
|
|
.Lepilogue_avx:
|
|
.byte 0xf3,0xc3
|
|
.cfi_endproc
|
|
.size aesni_cbc_sha256_enc_avx,.-aesni_cbc_sha256_enc_avx
|
|
.type aesni_cbc_sha256_enc_avx2,@function
|
|
.align 64
|
|
aesni_cbc_sha256_enc_avx2:
|
|
.cfi_startproc
|
|
.Lavx2_shortcut:
|
|
movq 8(%rsp),%r10
|
|
movq %rsp,%rax
|
|
.cfi_def_cfa_register %rax
|
|
pushq %rbx
|
|
.cfi_offset %rbx,-16
|
|
pushq %rbp
|
|
.cfi_offset %rbp,-24
|
|
pushq %r12
|
|
.cfi_offset %r12,-32
|
|
pushq %r13
|
|
.cfi_offset %r13,-40
|
|
pushq %r14
|
|
.cfi_offset %r14,-48
|
|
pushq %r15
|
|
.cfi_offset %r15,-56
|
|
subq $576,%rsp
|
|
andq $-1024,%rsp
|
|
addq $448,%rsp
|
|
|
|
shlq $6,%rdx
|
|
subq %rdi,%rsi
|
|
subq %rdi,%r10
|
|
addq %rdi,%rdx
|
|
|
|
|
|
|
|
movq %rdx,64+16(%rsp)
|
|
|
|
movq %r8,64+32(%rsp)
|
|
movq %r9,64+40(%rsp)
|
|
movq %r10,64+48(%rsp)
|
|
movq %rax,120(%rsp)
|
|
.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
|
|
.Lprologue_avx2:
|
|
vzeroall
|
|
|
|
movq %rdi,%r13
|
|
vpinsrq $1,%rsi,%xmm15,%xmm15
|
|
leaq 128(%rcx),%rdi
|
|
leaq K256+544(%rip),%r12
|
|
movl 240-128(%rdi),%r14d
|
|
movq %r9,%r15
|
|
movq %r10,%rsi
|
|
vmovdqu (%r8),%xmm8
|
|
leaq -9(%r14),%r14
|
|
|
|
vmovdqa 0(%r12,%r14,8),%xmm14
|
|
vmovdqa 16(%r12,%r14,8),%xmm13
|
|
vmovdqa 32(%r12,%r14,8),%xmm12
|
|
|
|
subq $-64,%r13
|
|
movl 0(%r15),%eax
|
|
leaq (%rsi,%r13,1),%r12
|
|
movl 4(%r15),%ebx
|
|
cmpq %rdx,%r13
|
|
movl 8(%r15),%ecx
|
|
cmoveq %rsp,%r12
|
|
movl 12(%r15),%edx
|
|
movl 16(%r15),%r8d
|
|
movl 20(%r15),%r9d
|
|
movl 24(%r15),%r10d
|
|
movl 28(%r15),%r11d
|
|
vmovdqu 0-128(%rdi),%xmm10
|
|
jmp .Loop_avx2
|
|
.align 16
|
|
.Loop_avx2:
|
|
vmovdqa K256+512(%rip),%ymm7
|
|
vmovdqu -64+0(%rsi,%r13,1),%xmm0
|
|
vmovdqu -64+16(%rsi,%r13,1),%xmm1
|
|
vmovdqu -64+32(%rsi,%r13,1),%xmm2
|
|
vmovdqu -64+48(%rsi,%r13,1),%xmm3
|
|
|
|
vinserti128 $1,(%r12),%ymm0,%ymm0
|
|
vinserti128 $1,16(%r12),%ymm1,%ymm1
|
|
vpshufb %ymm7,%ymm0,%ymm0
|
|
vinserti128 $1,32(%r12),%ymm2,%ymm2
|
|
vpshufb %ymm7,%ymm1,%ymm1
|
|
vinserti128 $1,48(%r12),%ymm3,%ymm3
|
|
|
|
leaq K256(%rip),%rbp
|
|
vpshufb %ymm7,%ymm2,%ymm2
|
|
leaq -64(%r13),%r13
|
|
vpaddd 0(%rbp),%ymm0,%ymm4
|
|
vpshufb %ymm7,%ymm3,%ymm3
|
|
vpaddd 32(%rbp),%ymm1,%ymm5
|
|
vpaddd 64(%rbp),%ymm2,%ymm6
|
|
vpaddd 96(%rbp),%ymm3,%ymm7
|
|
vmovdqa %ymm4,0(%rsp)
|
|
xorl %r14d,%r14d
|
|
vmovdqa %ymm5,32(%rsp)
|
|
leaq -64(%rsp),%rsp
|
|
movl %ebx,%esi
|
|
vmovdqa %ymm6,0(%rsp)
|
|
xorl %ecx,%esi
|
|
vmovdqa %ymm7,32(%rsp)
|
|
movl %r9d,%r12d
|
|
subq $-32*4,%rbp
|
|
jmp .Lavx2_00_47
|
|
|
|
.align 16
|
|
.Lavx2_00_47:
|
|
vmovdqu (%r13),%xmm9
|
|
vpinsrq $0,%r13,%xmm15,%xmm15
|
|
leaq -64(%rsp),%rsp
|
|
vpalignr $4,%ymm0,%ymm1,%ymm4
|
|
addl 0+128(%rsp),%r11d
|
|
andl %r8d,%r12d
|
|
rorxl $25,%r8d,%r13d
|
|
vpalignr $4,%ymm2,%ymm3,%ymm7
|
|
rorxl $11,%r8d,%r15d
|
|
leal (%rax,%r14,1),%eax
|
|
leal (%r11,%r12,1),%r11d
|
|
vpsrld $7,%ymm4,%ymm6
|
|
andnl %r10d,%r8d,%r12d
|
|
xorl %r15d,%r13d
|
|
rorxl $6,%r8d,%r14d
|
|
vpaddd %ymm7,%ymm0,%ymm0
|
|
leal (%r11,%r12,1),%r11d
|
|
xorl %r14d,%r13d
|
|
movl %eax,%r15d
|
|
vpsrld $3,%ymm4,%ymm7
|
|
rorxl $22,%eax,%r12d
|
|
leal (%r11,%r13,1),%r11d
|
|
xorl %ebx,%r15d
|
|
vpslld $14,%ymm4,%ymm5
|
|
rorxl $13,%eax,%r14d
|
|
rorxl $2,%eax,%r13d
|
|
leal (%rdx,%r11,1),%edx
|
|
vpxor %ymm6,%ymm7,%ymm4
|
|
andl %r15d,%esi
|
|
vpxor %xmm10,%xmm9,%xmm9
|
|
vmovdqu 16-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %ebx,%esi
|
|
vpshufd $250,%ymm3,%ymm7
|
|
xorl %r13d,%r14d
|
|
leal (%r11,%rsi,1),%r11d
|
|
movl %r8d,%r12d
|
|
vpsrld $11,%ymm6,%ymm6
|
|
addl 4+128(%rsp),%r10d
|
|
andl %edx,%r12d
|
|
rorxl $25,%edx,%r13d
|
|
vpxor %ymm5,%ymm4,%ymm4
|
|
rorxl $11,%edx,%esi
|
|
leal (%r11,%r14,1),%r11d
|
|
leal (%r10,%r12,1),%r10d
|
|
vpslld $11,%ymm5,%ymm5
|
|
andnl %r9d,%edx,%r12d
|
|
xorl %esi,%r13d
|
|
rorxl $6,%edx,%r14d
|
|
vpxor %ymm6,%ymm4,%ymm4
|
|
leal (%r10,%r12,1),%r10d
|
|
xorl %r14d,%r13d
|
|
movl %r11d,%esi
|
|
vpsrld $10,%ymm7,%ymm6
|
|
rorxl $22,%r11d,%r12d
|
|
leal (%r10,%r13,1),%r10d
|
|
xorl %eax,%esi
|
|
vpxor %ymm5,%ymm4,%ymm4
|
|
rorxl $13,%r11d,%r14d
|
|
rorxl $2,%r11d,%r13d
|
|
leal (%rcx,%r10,1),%ecx
|
|
vpsrlq $17,%ymm7,%ymm7
|
|
andl %esi,%r15d
|
|
vpxor %xmm8,%xmm9,%xmm9
|
|
xorl %r12d,%r14d
|
|
xorl %eax,%r15d
|
|
vpaddd %ymm4,%ymm0,%ymm0
|
|
xorl %r13d,%r14d
|
|
leal (%r10,%r15,1),%r10d
|
|
movl %edx,%r12d
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
addl 8+128(%rsp),%r9d
|
|
andl %ecx,%r12d
|
|
rorxl $25,%ecx,%r13d
|
|
vpsrlq $2,%ymm7,%ymm7
|
|
rorxl $11,%ecx,%r15d
|
|
leal (%r10,%r14,1),%r10d
|
|
leal (%r9,%r12,1),%r9d
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
andnl %r8d,%ecx,%r12d
|
|
xorl %r15d,%r13d
|
|
rorxl $6,%ecx,%r14d
|
|
vpshufd $132,%ymm6,%ymm6
|
|
leal (%r9,%r12,1),%r9d
|
|
xorl %r14d,%r13d
|
|
movl %r10d,%r15d
|
|
vpsrldq $8,%ymm6,%ymm6
|
|
rorxl $22,%r10d,%r12d
|
|
leal (%r9,%r13,1),%r9d
|
|
xorl %r11d,%r15d
|
|
vpaddd %ymm6,%ymm0,%ymm0
|
|
rorxl $13,%r10d,%r14d
|
|
rorxl $2,%r10d,%r13d
|
|
leal (%rbx,%r9,1),%ebx
|
|
vpshufd $80,%ymm0,%ymm7
|
|
andl %r15d,%esi
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 32-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %r11d,%esi
|
|
vpsrld $10,%ymm7,%ymm6
|
|
xorl %r13d,%r14d
|
|
leal (%r9,%rsi,1),%r9d
|
|
movl %ecx,%r12d
|
|
vpsrlq $17,%ymm7,%ymm7
|
|
addl 12+128(%rsp),%r8d
|
|
andl %ebx,%r12d
|
|
rorxl $25,%ebx,%r13d
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
rorxl $11,%ebx,%esi
|
|
leal (%r9,%r14,1),%r9d
|
|
leal (%r8,%r12,1),%r8d
|
|
vpsrlq $2,%ymm7,%ymm7
|
|
andnl %edx,%ebx,%r12d
|
|
xorl %esi,%r13d
|
|
rorxl $6,%ebx,%r14d
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
leal (%r8,%r12,1),%r8d
|
|
xorl %r14d,%r13d
|
|
movl %r9d,%esi
|
|
vpshufd $232,%ymm6,%ymm6
|
|
rorxl $22,%r9d,%r12d
|
|
leal (%r8,%r13,1),%r8d
|
|
xorl %r10d,%esi
|
|
vpslldq $8,%ymm6,%ymm6
|
|
rorxl $13,%r9d,%r14d
|
|
rorxl $2,%r9d,%r13d
|
|
leal (%rax,%r8,1),%eax
|
|
vpaddd %ymm6,%ymm0,%ymm0
|
|
andl %esi,%r15d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 48-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %r10d,%r15d
|
|
vpaddd 0(%rbp),%ymm0,%ymm6
|
|
xorl %r13d,%r14d
|
|
leal (%r8,%r15,1),%r8d
|
|
movl %ebx,%r12d
|
|
vmovdqa %ymm6,0(%rsp)
|
|
vpalignr $4,%ymm1,%ymm2,%ymm4
|
|
addl 32+128(%rsp),%edx
|
|
andl %eax,%r12d
|
|
rorxl $25,%eax,%r13d
|
|
vpalignr $4,%ymm3,%ymm0,%ymm7
|
|
rorxl $11,%eax,%r15d
|
|
leal (%r8,%r14,1),%r8d
|
|
leal (%rdx,%r12,1),%edx
|
|
vpsrld $7,%ymm4,%ymm6
|
|
andnl %ecx,%eax,%r12d
|
|
xorl %r15d,%r13d
|
|
rorxl $6,%eax,%r14d
|
|
vpaddd %ymm7,%ymm1,%ymm1
|
|
leal (%rdx,%r12,1),%edx
|
|
xorl %r14d,%r13d
|
|
movl %r8d,%r15d
|
|
vpsrld $3,%ymm4,%ymm7
|
|
rorxl $22,%r8d,%r12d
|
|
leal (%rdx,%r13,1),%edx
|
|
xorl %r9d,%r15d
|
|
vpslld $14,%ymm4,%ymm5
|
|
rorxl $13,%r8d,%r14d
|
|
rorxl $2,%r8d,%r13d
|
|
leal (%r11,%rdx,1),%r11d
|
|
vpxor %ymm6,%ymm7,%ymm4
|
|
andl %r15d,%esi
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 64-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %r9d,%esi
|
|
vpshufd $250,%ymm0,%ymm7
|
|
xorl %r13d,%r14d
|
|
leal (%rdx,%rsi,1),%edx
|
|
movl %eax,%r12d
|
|
vpsrld $11,%ymm6,%ymm6
|
|
addl 36+128(%rsp),%ecx
|
|
andl %r11d,%r12d
|
|
rorxl $25,%r11d,%r13d
|
|
vpxor %ymm5,%ymm4,%ymm4
|
|
rorxl $11,%r11d,%esi
|
|
leal (%rdx,%r14,1),%edx
|
|
leal (%rcx,%r12,1),%ecx
|
|
vpslld $11,%ymm5,%ymm5
|
|
andnl %ebx,%r11d,%r12d
|
|
xorl %esi,%r13d
|
|
rorxl $6,%r11d,%r14d
|
|
vpxor %ymm6,%ymm4,%ymm4
|
|
leal (%rcx,%r12,1),%ecx
|
|
xorl %r14d,%r13d
|
|
movl %edx,%esi
|
|
vpsrld $10,%ymm7,%ymm6
|
|
rorxl $22,%edx,%r12d
|
|
leal (%rcx,%r13,1),%ecx
|
|
xorl %r8d,%esi
|
|
vpxor %ymm5,%ymm4,%ymm4
|
|
rorxl $13,%edx,%r14d
|
|
rorxl $2,%edx,%r13d
|
|
leal (%r10,%rcx,1),%r10d
|
|
vpsrlq $17,%ymm7,%ymm7
|
|
andl %esi,%r15d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 80-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %r8d,%r15d
|
|
vpaddd %ymm4,%ymm1,%ymm1
|
|
xorl %r13d,%r14d
|
|
leal (%rcx,%r15,1),%ecx
|
|
movl %r11d,%r12d
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
addl 40+128(%rsp),%ebx
|
|
andl %r10d,%r12d
|
|
rorxl $25,%r10d,%r13d
|
|
vpsrlq $2,%ymm7,%ymm7
|
|
rorxl $11,%r10d,%r15d
|
|
leal (%rcx,%r14,1),%ecx
|
|
leal (%rbx,%r12,1),%ebx
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
andnl %eax,%r10d,%r12d
|
|
xorl %r15d,%r13d
|
|
rorxl $6,%r10d,%r14d
|
|
vpshufd $132,%ymm6,%ymm6
|
|
leal (%rbx,%r12,1),%ebx
|
|
xorl %r14d,%r13d
|
|
movl %ecx,%r15d
|
|
vpsrldq $8,%ymm6,%ymm6
|
|
rorxl $22,%ecx,%r12d
|
|
leal (%rbx,%r13,1),%ebx
|
|
xorl %edx,%r15d
|
|
vpaddd %ymm6,%ymm1,%ymm1
|
|
rorxl $13,%ecx,%r14d
|
|
rorxl $2,%ecx,%r13d
|
|
leal (%r9,%rbx,1),%r9d
|
|
vpshufd $80,%ymm1,%ymm7
|
|
andl %r15d,%esi
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 96-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %edx,%esi
|
|
vpsrld $10,%ymm7,%ymm6
|
|
xorl %r13d,%r14d
|
|
leal (%rbx,%rsi,1),%ebx
|
|
movl %r10d,%r12d
|
|
vpsrlq $17,%ymm7,%ymm7
|
|
addl 44+128(%rsp),%eax
|
|
andl %r9d,%r12d
|
|
rorxl $25,%r9d,%r13d
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
rorxl $11,%r9d,%esi
|
|
leal (%rbx,%r14,1),%ebx
|
|
leal (%rax,%r12,1),%eax
|
|
vpsrlq $2,%ymm7,%ymm7
|
|
andnl %r11d,%r9d,%r12d
|
|
xorl %esi,%r13d
|
|
rorxl $6,%r9d,%r14d
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
leal (%rax,%r12,1),%eax
|
|
xorl %r14d,%r13d
|
|
movl %ebx,%esi
|
|
vpshufd $232,%ymm6,%ymm6
|
|
rorxl $22,%ebx,%r12d
|
|
leal (%rax,%r13,1),%eax
|
|
xorl %ecx,%esi
|
|
vpslldq $8,%ymm6,%ymm6
|
|
rorxl $13,%ebx,%r14d
|
|
rorxl $2,%ebx,%r13d
|
|
leal (%r8,%rax,1),%r8d
|
|
vpaddd %ymm6,%ymm1,%ymm1
|
|
andl %esi,%r15d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 112-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %ecx,%r15d
|
|
vpaddd 32(%rbp),%ymm1,%ymm6
|
|
xorl %r13d,%r14d
|
|
leal (%rax,%r15,1),%eax
|
|
movl %r9d,%r12d
|
|
vmovdqa %ymm6,32(%rsp)
|
|
leaq -64(%rsp),%rsp
|
|
vpalignr $4,%ymm2,%ymm3,%ymm4
|
|
addl 0+128(%rsp),%r11d
|
|
andl %r8d,%r12d
|
|
rorxl $25,%r8d,%r13d
|
|
vpalignr $4,%ymm0,%ymm1,%ymm7
|
|
rorxl $11,%r8d,%r15d
|
|
leal (%rax,%r14,1),%eax
|
|
leal (%r11,%r12,1),%r11d
|
|
vpsrld $7,%ymm4,%ymm6
|
|
andnl %r10d,%r8d,%r12d
|
|
xorl %r15d,%r13d
|
|
rorxl $6,%r8d,%r14d
|
|
vpaddd %ymm7,%ymm2,%ymm2
|
|
leal (%r11,%r12,1),%r11d
|
|
xorl %r14d,%r13d
|
|
movl %eax,%r15d
|
|
vpsrld $3,%ymm4,%ymm7
|
|
rorxl $22,%eax,%r12d
|
|
leal (%r11,%r13,1),%r11d
|
|
xorl %ebx,%r15d
|
|
vpslld $14,%ymm4,%ymm5
|
|
rorxl $13,%eax,%r14d
|
|
rorxl $2,%eax,%r13d
|
|
leal (%rdx,%r11,1),%edx
|
|
vpxor %ymm6,%ymm7,%ymm4
|
|
andl %r15d,%esi
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 128-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %ebx,%esi
|
|
vpshufd $250,%ymm1,%ymm7
|
|
xorl %r13d,%r14d
|
|
leal (%r11,%rsi,1),%r11d
|
|
movl %r8d,%r12d
|
|
vpsrld $11,%ymm6,%ymm6
|
|
addl 4+128(%rsp),%r10d
|
|
andl %edx,%r12d
|
|
rorxl $25,%edx,%r13d
|
|
vpxor %ymm5,%ymm4,%ymm4
|
|
rorxl $11,%edx,%esi
|
|
leal (%r11,%r14,1),%r11d
|
|
leal (%r10,%r12,1),%r10d
|
|
vpslld $11,%ymm5,%ymm5
|
|
andnl %r9d,%edx,%r12d
|
|
xorl %esi,%r13d
|
|
rorxl $6,%edx,%r14d
|
|
vpxor %ymm6,%ymm4,%ymm4
|
|
leal (%r10,%r12,1),%r10d
|
|
xorl %r14d,%r13d
|
|
movl %r11d,%esi
|
|
vpsrld $10,%ymm7,%ymm6
|
|
rorxl $22,%r11d,%r12d
|
|
leal (%r10,%r13,1),%r10d
|
|
xorl %eax,%esi
|
|
vpxor %ymm5,%ymm4,%ymm4
|
|
rorxl $13,%r11d,%r14d
|
|
rorxl $2,%r11d,%r13d
|
|
leal (%rcx,%r10,1),%ecx
|
|
vpsrlq $17,%ymm7,%ymm7
|
|
andl %esi,%r15d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 144-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %eax,%r15d
|
|
vpaddd %ymm4,%ymm2,%ymm2
|
|
xorl %r13d,%r14d
|
|
leal (%r10,%r15,1),%r10d
|
|
movl %edx,%r12d
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
addl 8+128(%rsp),%r9d
|
|
andl %ecx,%r12d
|
|
rorxl $25,%ecx,%r13d
|
|
vpsrlq $2,%ymm7,%ymm7
|
|
rorxl $11,%ecx,%r15d
|
|
leal (%r10,%r14,1),%r10d
|
|
leal (%r9,%r12,1),%r9d
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
andnl %r8d,%ecx,%r12d
|
|
xorl %r15d,%r13d
|
|
rorxl $6,%ecx,%r14d
|
|
vpshufd $132,%ymm6,%ymm6
|
|
leal (%r9,%r12,1),%r9d
|
|
xorl %r14d,%r13d
|
|
movl %r10d,%r15d
|
|
vpsrldq $8,%ymm6,%ymm6
|
|
rorxl $22,%r10d,%r12d
|
|
leal (%r9,%r13,1),%r9d
|
|
xorl %r11d,%r15d
|
|
vpaddd %ymm6,%ymm2,%ymm2
|
|
rorxl $13,%r10d,%r14d
|
|
rorxl $2,%r10d,%r13d
|
|
leal (%rbx,%r9,1),%ebx
|
|
vpshufd $80,%ymm2,%ymm7
|
|
andl %r15d,%esi
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 160-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %r11d,%esi
|
|
vpsrld $10,%ymm7,%ymm6
|
|
xorl %r13d,%r14d
|
|
leal (%r9,%rsi,1),%r9d
|
|
movl %ecx,%r12d
|
|
vpsrlq $17,%ymm7,%ymm7
|
|
addl 12+128(%rsp),%r8d
|
|
andl %ebx,%r12d
|
|
rorxl $25,%ebx,%r13d
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
rorxl $11,%ebx,%esi
|
|
leal (%r9,%r14,1),%r9d
|
|
leal (%r8,%r12,1),%r8d
|
|
vpsrlq $2,%ymm7,%ymm7
|
|
andnl %edx,%ebx,%r12d
|
|
xorl %esi,%r13d
|
|
rorxl $6,%ebx,%r14d
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
leal (%r8,%r12,1),%r8d
|
|
xorl %r14d,%r13d
|
|
movl %r9d,%esi
|
|
vpshufd $232,%ymm6,%ymm6
|
|
rorxl $22,%r9d,%r12d
|
|
leal (%r8,%r13,1),%r8d
|
|
xorl %r10d,%esi
|
|
vpslldq $8,%ymm6,%ymm6
|
|
rorxl $13,%r9d,%r14d
|
|
rorxl $2,%r9d,%r13d
|
|
leal (%rax,%r8,1),%eax
|
|
vpaddd %ymm6,%ymm2,%ymm2
|
|
andl %esi,%r15d
|
|
vaesenclast %xmm10,%xmm9,%xmm11
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 176-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %r10d,%r15d
|
|
vpaddd 64(%rbp),%ymm2,%ymm6
|
|
xorl %r13d,%r14d
|
|
leal (%r8,%r15,1),%r8d
|
|
movl %ebx,%r12d
|
|
vmovdqa %ymm6,0(%rsp)
|
|
vpalignr $4,%ymm3,%ymm0,%ymm4
|
|
addl 32+128(%rsp),%edx
|
|
andl %eax,%r12d
|
|
rorxl $25,%eax,%r13d
|
|
vpalignr $4,%ymm1,%ymm2,%ymm7
|
|
rorxl $11,%eax,%r15d
|
|
leal (%r8,%r14,1),%r8d
|
|
leal (%rdx,%r12,1),%edx
|
|
vpsrld $7,%ymm4,%ymm6
|
|
andnl %ecx,%eax,%r12d
|
|
xorl %r15d,%r13d
|
|
rorxl $6,%eax,%r14d
|
|
vpaddd %ymm7,%ymm3,%ymm3
|
|
leal (%rdx,%r12,1),%edx
|
|
xorl %r14d,%r13d
|
|
movl %r8d,%r15d
|
|
vpsrld $3,%ymm4,%ymm7
|
|
rorxl $22,%r8d,%r12d
|
|
leal (%rdx,%r13,1),%edx
|
|
xorl %r9d,%r15d
|
|
vpslld $14,%ymm4,%ymm5
|
|
rorxl $13,%r8d,%r14d
|
|
rorxl $2,%r8d,%r13d
|
|
leal (%r11,%rdx,1),%r11d
|
|
vpxor %ymm6,%ymm7,%ymm4
|
|
andl %r15d,%esi
|
|
vpand %xmm12,%xmm11,%xmm8
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 192-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %r9d,%esi
|
|
vpshufd $250,%ymm2,%ymm7
|
|
xorl %r13d,%r14d
|
|
leal (%rdx,%rsi,1),%edx
|
|
movl %eax,%r12d
|
|
vpsrld $11,%ymm6,%ymm6
|
|
addl 36+128(%rsp),%ecx
|
|
andl %r11d,%r12d
|
|
rorxl $25,%r11d,%r13d
|
|
vpxor %ymm5,%ymm4,%ymm4
|
|
rorxl $11,%r11d,%esi
|
|
leal (%rdx,%r14,1),%edx
|
|
leal (%rcx,%r12,1),%ecx
|
|
vpslld $11,%ymm5,%ymm5
|
|
andnl %ebx,%r11d,%r12d
|
|
xorl %esi,%r13d
|
|
rorxl $6,%r11d,%r14d
|
|
vpxor %ymm6,%ymm4,%ymm4
|
|
leal (%rcx,%r12,1),%ecx
|
|
xorl %r14d,%r13d
|
|
movl %edx,%esi
|
|
vpsrld $10,%ymm7,%ymm6
|
|
rorxl $22,%edx,%r12d
|
|
leal (%rcx,%r13,1),%ecx
|
|
xorl %r8d,%esi
|
|
vpxor %ymm5,%ymm4,%ymm4
|
|
rorxl $13,%edx,%r14d
|
|
rorxl $2,%edx,%r13d
|
|
leal (%r10,%rcx,1),%r10d
|
|
vpsrlq $17,%ymm7,%ymm7
|
|
andl %esi,%r15d
|
|
vaesenclast %xmm10,%xmm9,%xmm11
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 208-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %r8d,%r15d
|
|
vpaddd %ymm4,%ymm3,%ymm3
|
|
xorl %r13d,%r14d
|
|
leal (%rcx,%r15,1),%ecx
|
|
movl %r11d,%r12d
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
addl 40+128(%rsp),%ebx
|
|
andl %r10d,%r12d
|
|
rorxl $25,%r10d,%r13d
|
|
vpsrlq $2,%ymm7,%ymm7
|
|
rorxl $11,%r10d,%r15d
|
|
leal (%rcx,%r14,1),%ecx
|
|
leal (%rbx,%r12,1),%ebx
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
andnl %eax,%r10d,%r12d
|
|
xorl %r15d,%r13d
|
|
rorxl $6,%r10d,%r14d
|
|
vpshufd $132,%ymm6,%ymm6
|
|
leal (%rbx,%r12,1),%ebx
|
|
xorl %r14d,%r13d
|
|
movl %ecx,%r15d
|
|
vpsrldq $8,%ymm6,%ymm6
|
|
rorxl $22,%ecx,%r12d
|
|
leal (%rbx,%r13,1),%ebx
|
|
xorl %edx,%r15d
|
|
vpaddd %ymm6,%ymm3,%ymm3
|
|
rorxl $13,%ecx,%r14d
|
|
rorxl $2,%ecx,%r13d
|
|
leal (%r9,%rbx,1),%r9d
|
|
vpshufd $80,%ymm3,%ymm7
|
|
andl %r15d,%esi
|
|
vpand %xmm13,%xmm11,%xmm11
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 224-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %edx,%esi
|
|
vpsrld $10,%ymm7,%ymm6
|
|
xorl %r13d,%r14d
|
|
leal (%rbx,%rsi,1),%ebx
|
|
movl %r10d,%r12d
|
|
vpsrlq $17,%ymm7,%ymm7
|
|
addl 44+128(%rsp),%eax
|
|
andl %r9d,%r12d
|
|
rorxl $25,%r9d,%r13d
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
rorxl $11,%r9d,%esi
|
|
leal (%rbx,%r14,1),%ebx
|
|
leal (%rax,%r12,1),%eax
|
|
vpsrlq $2,%ymm7,%ymm7
|
|
andnl %r11d,%r9d,%r12d
|
|
xorl %esi,%r13d
|
|
rorxl $6,%r9d,%r14d
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
leal (%rax,%r12,1),%eax
|
|
xorl %r14d,%r13d
|
|
movl %ebx,%esi
|
|
vpshufd $232,%ymm6,%ymm6
|
|
rorxl $22,%ebx,%r12d
|
|
leal (%rax,%r13,1),%eax
|
|
xorl %ecx,%esi
|
|
vpslldq $8,%ymm6,%ymm6
|
|
rorxl $13,%ebx,%r14d
|
|
rorxl $2,%ebx,%r13d
|
|
leal (%r8,%rax,1),%r8d
|
|
vpaddd %ymm6,%ymm3,%ymm3
|
|
andl %esi,%r15d
|
|
vpor %xmm11,%xmm8,%xmm8
|
|
vaesenclast %xmm10,%xmm9,%xmm11
|
|
vmovdqu 0-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %ecx,%r15d
|
|
vpaddd 96(%rbp),%ymm3,%ymm6
|
|
xorl %r13d,%r14d
|
|
leal (%rax,%r15,1),%eax
|
|
movl %r9d,%r12d
|
|
vmovdqa %ymm6,32(%rsp)
|
|
vmovq %xmm15,%r13
|
|
vpextrq $1,%xmm15,%r15
|
|
vpand %xmm14,%xmm11,%xmm11
|
|
vpor %xmm11,%xmm8,%xmm8
|
|
vmovdqu %xmm8,(%r15,%r13,1)
|
|
leaq 16(%r13),%r13
|
|
leaq 128(%rbp),%rbp
|
|
cmpb $0,3(%rbp)
|
|
jne .Lavx2_00_47
|
|
vmovdqu (%r13),%xmm9
|
|
vpinsrq $0,%r13,%xmm15,%xmm15
|
|
addl 0+64(%rsp),%r11d
|
|
andl %r8d,%r12d
|
|
rorxl $25,%r8d,%r13d
|
|
rorxl $11,%r8d,%r15d
|
|
leal (%rax,%r14,1),%eax
|
|
leal (%r11,%r12,1),%r11d
|
|
andnl %r10d,%r8d,%r12d
|
|
xorl %r15d,%r13d
|
|
rorxl $6,%r8d,%r14d
|
|
leal (%r11,%r12,1),%r11d
|
|
xorl %r14d,%r13d
|
|
movl %eax,%r15d
|
|
rorxl $22,%eax,%r12d
|
|
leal (%r11,%r13,1),%r11d
|
|
xorl %ebx,%r15d
|
|
rorxl $13,%eax,%r14d
|
|
rorxl $2,%eax,%r13d
|
|
leal (%rdx,%r11,1),%edx
|
|
andl %r15d,%esi
|
|
vpxor %xmm10,%xmm9,%xmm9
|
|
vmovdqu 16-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %ebx,%esi
|
|
xorl %r13d,%r14d
|
|
leal (%r11,%rsi,1),%r11d
|
|
movl %r8d,%r12d
|
|
addl 4+64(%rsp),%r10d
|
|
andl %edx,%r12d
|
|
rorxl $25,%edx,%r13d
|
|
rorxl $11,%edx,%esi
|
|
leal (%r11,%r14,1),%r11d
|
|
leal (%r10,%r12,1),%r10d
|
|
andnl %r9d,%edx,%r12d
|
|
xorl %esi,%r13d
|
|
rorxl $6,%edx,%r14d
|
|
leal (%r10,%r12,1),%r10d
|
|
xorl %r14d,%r13d
|
|
movl %r11d,%esi
|
|
rorxl $22,%r11d,%r12d
|
|
leal (%r10,%r13,1),%r10d
|
|
xorl %eax,%esi
|
|
rorxl $13,%r11d,%r14d
|
|
rorxl $2,%r11d,%r13d
|
|
leal (%rcx,%r10,1),%ecx
|
|
andl %esi,%r15d
|
|
vpxor %xmm8,%xmm9,%xmm9
|
|
xorl %r12d,%r14d
|
|
xorl %eax,%r15d
|
|
xorl %r13d,%r14d
|
|
leal (%r10,%r15,1),%r10d
|
|
movl %edx,%r12d
|
|
addl 8+64(%rsp),%r9d
|
|
andl %ecx,%r12d
|
|
rorxl $25,%ecx,%r13d
|
|
rorxl $11,%ecx,%r15d
|
|
leal (%r10,%r14,1),%r10d
|
|
leal (%r9,%r12,1),%r9d
|
|
andnl %r8d,%ecx,%r12d
|
|
xorl %r15d,%r13d
|
|
rorxl $6,%ecx,%r14d
|
|
leal (%r9,%r12,1),%r9d
|
|
xorl %r14d,%r13d
|
|
movl %r10d,%r15d
|
|
rorxl $22,%r10d,%r12d
|
|
leal (%r9,%r13,1),%r9d
|
|
xorl %r11d,%r15d
|
|
rorxl $13,%r10d,%r14d
|
|
rorxl $2,%r10d,%r13d
|
|
leal (%rbx,%r9,1),%ebx
|
|
andl %r15d,%esi
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 32-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %r11d,%esi
|
|
xorl %r13d,%r14d
|
|
leal (%r9,%rsi,1),%r9d
|
|
movl %ecx,%r12d
|
|
addl 12+64(%rsp),%r8d
|
|
andl %ebx,%r12d
|
|
rorxl $25,%ebx,%r13d
|
|
rorxl $11,%ebx,%esi
|
|
leal (%r9,%r14,1),%r9d
|
|
leal (%r8,%r12,1),%r8d
|
|
andnl %edx,%ebx,%r12d
|
|
xorl %esi,%r13d
|
|
rorxl $6,%ebx,%r14d
|
|
leal (%r8,%r12,1),%r8d
|
|
xorl %r14d,%r13d
|
|
movl %r9d,%esi
|
|
rorxl $22,%r9d,%r12d
|
|
leal (%r8,%r13,1),%r8d
|
|
xorl %r10d,%esi
|
|
rorxl $13,%r9d,%r14d
|
|
rorxl $2,%r9d,%r13d
|
|
leal (%rax,%r8,1),%eax
|
|
andl %esi,%r15d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 48-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %r10d,%r15d
|
|
xorl %r13d,%r14d
|
|
leal (%r8,%r15,1),%r8d
|
|
movl %ebx,%r12d
|
|
addl 32+64(%rsp),%edx
|
|
andl %eax,%r12d
|
|
rorxl $25,%eax,%r13d
|
|
rorxl $11,%eax,%r15d
|
|
leal (%r8,%r14,1),%r8d
|
|
leal (%rdx,%r12,1),%edx
|
|
andnl %ecx,%eax,%r12d
|
|
xorl %r15d,%r13d
|
|
rorxl $6,%eax,%r14d
|
|
leal (%rdx,%r12,1),%edx
|
|
xorl %r14d,%r13d
|
|
movl %r8d,%r15d
|
|
rorxl $22,%r8d,%r12d
|
|
leal (%rdx,%r13,1),%edx
|
|
xorl %r9d,%r15d
|
|
rorxl $13,%r8d,%r14d
|
|
rorxl $2,%r8d,%r13d
|
|
leal (%r11,%rdx,1),%r11d
|
|
andl %r15d,%esi
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 64-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %r9d,%esi
|
|
xorl %r13d,%r14d
|
|
leal (%rdx,%rsi,1),%edx
|
|
movl %eax,%r12d
|
|
addl 36+64(%rsp),%ecx
|
|
andl %r11d,%r12d
|
|
rorxl $25,%r11d,%r13d
|
|
rorxl $11,%r11d,%esi
|
|
leal (%rdx,%r14,1),%edx
|
|
leal (%rcx,%r12,1),%ecx
|
|
andnl %ebx,%r11d,%r12d
|
|
xorl %esi,%r13d
|
|
rorxl $6,%r11d,%r14d
|
|
leal (%rcx,%r12,1),%ecx
|
|
xorl %r14d,%r13d
|
|
movl %edx,%esi
|
|
rorxl $22,%edx,%r12d
|
|
leal (%rcx,%r13,1),%ecx
|
|
xorl %r8d,%esi
|
|
rorxl $13,%edx,%r14d
|
|
rorxl $2,%edx,%r13d
|
|
leal (%r10,%rcx,1),%r10d
|
|
andl %esi,%r15d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 80-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %r8d,%r15d
|
|
xorl %r13d,%r14d
|
|
leal (%rcx,%r15,1),%ecx
|
|
movl %r11d,%r12d
|
|
addl 40+64(%rsp),%ebx
|
|
andl %r10d,%r12d
|
|
rorxl $25,%r10d,%r13d
|
|
rorxl $11,%r10d,%r15d
|
|
leal (%rcx,%r14,1),%ecx
|
|
leal (%rbx,%r12,1),%ebx
|
|
andnl %eax,%r10d,%r12d
|
|
xorl %r15d,%r13d
|
|
rorxl $6,%r10d,%r14d
|
|
leal (%rbx,%r12,1),%ebx
|
|
xorl %r14d,%r13d
|
|
movl %ecx,%r15d
|
|
rorxl $22,%ecx,%r12d
|
|
leal (%rbx,%r13,1),%ebx
|
|
xorl %edx,%r15d
|
|
rorxl $13,%ecx,%r14d
|
|
rorxl $2,%ecx,%r13d
|
|
leal (%r9,%rbx,1),%r9d
|
|
andl %r15d,%esi
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 96-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %edx,%esi
|
|
xorl %r13d,%r14d
|
|
leal (%rbx,%rsi,1),%ebx
|
|
movl %r10d,%r12d
|
|
addl 44+64(%rsp),%eax
|
|
andl %r9d,%r12d
|
|
rorxl $25,%r9d,%r13d
|
|
rorxl $11,%r9d,%esi
|
|
leal (%rbx,%r14,1),%ebx
|
|
leal (%rax,%r12,1),%eax
|
|
andnl %r11d,%r9d,%r12d
|
|
xorl %esi,%r13d
|
|
rorxl $6,%r9d,%r14d
|
|
leal (%rax,%r12,1),%eax
|
|
xorl %r14d,%r13d
|
|
movl %ebx,%esi
|
|
rorxl $22,%ebx,%r12d
|
|
leal (%rax,%r13,1),%eax
|
|
xorl %ecx,%esi
|
|
rorxl $13,%ebx,%r14d
|
|
rorxl $2,%ebx,%r13d
|
|
leal (%r8,%rax,1),%r8d
|
|
andl %esi,%r15d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 112-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %ecx,%r15d
|
|
xorl %r13d,%r14d
|
|
leal (%rax,%r15,1),%eax
|
|
movl %r9d,%r12d
|
|
addl 0(%rsp),%r11d
|
|
andl %r8d,%r12d
|
|
rorxl $25,%r8d,%r13d
|
|
rorxl $11,%r8d,%r15d
|
|
leal (%rax,%r14,1),%eax
|
|
leal (%r11,%r12,1),%r11d
|
|
andnl %r10d,%r8d,%r12d
|
|
xorl %r15d,%r13d
|
|
rorxl $6,%r8d,%r14d
|
|
leal (%r11,%r12,1),%r11d
|
|
xorl %r14d,%r13d
|
|
movl %eax,%r15d
|
|
rorxl $22,%eax,%r12d
|
|
leal (%r11,%r13,1),%r11d
|
|
xorl %ebx,%r15d
|
|
rorxl $13,%eax,%r14d
|
|
rorxl $2,%eax,%r13d
|
|
leal (%rdx,%r11,1),%edx
|
|
andl %r15d,%esi
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 128-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %ebx,%esi
|
|
xorl %r13d,%r14d
|
|
leal (%r11,%rsi,1),%r11d
|
|
movl %r8d,%r12d
|
|
addl 4(%rsp),%r10d
|
|
andl %edx,%r12d
|
|
rorxl $25,%edx,%r13d
|
|
rorxl $11,%edx,%esi
|
|
leal (%r11,%r14,1),%r11d
|
|
leal (%r10,%r12,1),%r10d
|
|
andnl %r9d,%edx,%r12d
|
|
xorl %esi,%r13d
|
|
rorxl $6,%edx,%r14d
|
|
leal (%r10,%r12,1),%r10d
|
|
xorl %r14d,%r13d
|
|
movl %r11d,%esi
|
|
rorxl $22,%r11d,%r12d
|
|
leal (%r10,%r13,1),%r10d
|
|
xorl %eax,%esi
|
|
rorxl $13,%r11d,%r14d
|
|
rorxl $2,%r11d,%r13d
|
|
leal (%rcx,%r10,1),%ecx
|
|
andl %esi,%r15d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 144-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %eax,%r15d
|
|
xorl %r13d,%r14d
|
|
leal (%r10,%r15,1),%r10d
|
|
movl %edx,%r12d
|
|
addl 8(%rsp),%r9d
|
|
andl %ecx,%r12d
|
|
rorxl $25,%ecx,%r13d
|
|
rorxl $11,%ecx,%r15d
|
|
leal (%r10,%r14,1),%r10d
|
|
leal (%r9,%r12,1),%r9d
|
|
andnl %r8d,%ecx,%r12d
|
|
xorl %r15d,%r13d
|
|
rorxl $6,%ecx,%r14d
|
|
leal (%r9,%r12,1),%r9d
|
|
xorl %r14d,%r13d
|
|
movl %r10d,%r15d
|
|
rorxl $22,%r10d,%r12d
|
|
leal (%r9,%r13,1),%r9d
|
|
xorl %r11d,%r15d
|
|
rorxl $13,%r10d,%r14d
|
|
rorxl $2,%r10d,%r13d
|
|
leal (%rbx,%r9,1),%ebx
|
|
andl %r15d,%esi
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 160-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %r11d,%esi
|
|
xorl %r13d,%r14d
|
|
leal (%r9,%rsi,1),%r9d
|
|
movl %ecx,%r12d
|
|
addl 12(%rsp),%r8d
|
|
andl %ebx,%r12d
|
|
rorxl $25,%ebx,%r13d
|
|
rorxl $11,%ebx,%esi
|
|
leal (%r9,%r14,1),%r9d
|
|
leal (%r8,%r12,1),%r8d
|
|
andnl %edx,%ebx,%r12d
|
|
xorl %esi,%r13d
|
|
rorxl $6,%ebx,%r14d
|
|
leal (%r8,%r12,1),%r8d
|
|
xorl %r14d,%r13d
|
|
movl %r9d,%esi
|
|
rorxl $22,%r9d,%r12d
|
|
leal (%r8,%r13,1),%r8d
|
|
xorl %r10d,%esi
|
|
rorxl $13,%r9d,%r14d
|
|
rorxl $2,%r9d,%r13d
|
|
leal (%rax,%r8,1),%eax
|
|
andl %esi,%r15d
|
|
vaesenclast %xmm10,%xmm9,%xmm11
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 176-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %r10d,%r15d
|
|
xorl %r13d,%r14d
|
|
leal (%r8,%r15,1),%r8d
|
|
movl %ebx,%r12d
|
|
addl 32(%rsp),%edx
|
|
andl %eax,%r12d
|
|
rorxl $25,%eax,%r13d
|
|
rorxl $11,%eax,%r15d
|
|
leal (%r8,%r14,1),%r8d
|
|
leal (%rdx,%r12,1),%edx
|
|
andnl %ecx,%eax,%r12d
|
|
xorl %r15d,%r13d
|
|
rorxl $6,%eax,%r14d
|
|
leal (%rdx,%r12,1),%edx
|
|
xorl %r14d,%r13d
|
|
movl %r8d,%r15d
|
|
rorxl $22,%r8d,%r12d
|
|
leal (%rdx,%r13,1),%edx
|
|
xorl %r9d,%r15d
|
|
rorxl $13,%r8d,%r14d
|
|
rorxl $2,%r8d,%r13d
|
|
leal (%r11,%rdx,1),%r11d
|
|
andl %r15d,%esi
|
|
vpand %xmm12,%xmm11,%xmm8
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 192-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %r9d,%esi
|
|
xorl %r13d,%r14d
|
|
leal (%rdx,%rsi,1),%edx
|
|
movl %eax,%r12d
|
|
addl 36(%rsp),%ecx
|
|
andl %r11d,%r12d
|
|
rorxl $25,%r11d,%r13d
|
|
rorxl $11,%r11d,%esi
|
|
leal (%rdx,%r14,1),%edx
|
|
leal (%rcx,%r12,1),%ecx
|
|
andnl %ebx,%r11d,%r12d
|
|
xorl %esi,%r13d
|
|
rorxl $6,%r11d,%r14d
|
|
leal (%rcx,%r12,1),%ecx
|
|
xorl %r14d,%r13d
|
|
movl %edx,%esi
|
|
rorxl $22,%edx,%r12d
|
|
leal (%rcx,%r13,1),%ecx
|
|
xorl %r8d,%esi
|
|
rorxl $13,%edx,%r14d
|
|
rorxl $2,%edx,%r13d
|
|
leal (%r10,%rcx,1),%r10d
|
|
andl %esi,%r15d
|
|
vaesenclast %xmm10,%xmm9,%xmm11
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 208-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %r8d,%r15d
|
|
xorl %r13d,%r14d
|
|
leal (%rcx,%r15,1),%ecx
|
|
movl %r11d,%r12d
|
|
addl 40(%rsp),%ebx
|
|
andl %r10d,%r12d
|
|
rorxl $25,%r10d,%r13d
|
|
rorxl $11,%r10d,%r15d
|
|
leal (%rcx,%r14,1),%ecx
|
|
leal (%rbx,%r12,1),%ebx
|
|
andnl %eax,%r10d,%r12d
|
|
xorl %r15d,%r13d
|
|
rorxl $6,%r10d,%r14d
|
|
leal (%rbx,%r12,1),%ebx
|
|
xorl %r14d,%r13d
|
|
movl %ecx,%r15d
|
|
rorxl $22,%ecx,%r12d
|
|
leal (%rbx,%r13,1),%ebx
|
|
xorl %edx,%r15d
|
|
rorxl $13,%ecx,%r14d
|
|
rorxl $2,%ecx,%r13d
|
|
leal (%r9,%rbx,1),%r9d
|
|
andl %r15d,%esi
|
|
vpand %xmm13,%xmm11,%xmm11
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 224-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %edx,%esi
|
|
xorl %r13d,%r14d
|
|
leal (%rbx,%rsi,1),%ebx
|
|
movl %r10d,%r12d
|
|
addl 44(%rsp),%eax
|
|
andl %r9d,%r12d
|
|
rorxl $25,%r9d,%r13d
|
|
rorxl $11,%r9d,%esi
|
|
leal (%rbx,%r14,1),%ebx
|
|
leal (%rax,%r12,1),%eax
|
|
andnl %r11d,%r9d,%r12d
|
|
xorl %esi,%r13d
|
|
rorxl $6,%r9d,%r14d
|
|
leal (%rax,%r12,1),%eax
|
|
xorl %r14d,%r13d
|
|
movl %ebx,%esi
|
|
rorxl $22,%ebx,%r12d
|
|
leal (%rax,%r13,1),%eax
|
|
xorl %ecx,%esi
|
|
rorxl $13,%ebx,%r14d
|
|
rorxl $2,%ebx,%r13d
|
|
leal (%r8,%rax,1),%r8d
|
|
andl %esi,%r15d
|
|
vpor %xmm11,%xmm8,%xmm8
|
|
vaesenclast %xmm10,%xmm9,%xmm11
|
|
vmovdqu 0-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %ecx,%r15d
|
|
xorl %r13d,%r14d
|
|
leal (%rax,%r15,1),%eax
|
|
movl %r9d,%r12d
|
|
vpextrq $1,%xmm15,%r12
|
|
vmovq %xmm15,%r13
|
|
movq 552(%rsp),%r15
|
|
addl %r14d,%eax
|
|
leaq 448(%rsp),%rbp
|
|
|
|
vpand %xmm14,%xmm11,%xmm11
|
|
vpor %xmm11,%xmm8,%xmm8
|
|
vmovdqu %xmm8,(%r12,%r13,1)
|
|
leaq 16(%r13),%r13
|
|
|
|
addl 0(%r15),%eax
|
|
addl 4(%r15),%ebx
|
|
addl 8(%r15),%ecx
|
|
addl 12(%r15),%edx
|
|
addl 16(%r15),%r8d
|
|
addl 20(%r15),%r9d
|
|
addl 24(%r15),%r10d
|
|
addl 28(%r15),%r11d
|
|
|
|
movl %eax,0(%r15)
|
|
movl %ebx,4(%r15)
|
|
movl %ecx,8(%r15)
|
|
movl %edx,12(%r15)
|
|
movl %r8d,16(%r15)
|
|
movl %r9d,20(%r15)
|
|
movl %r10d,24(%r15)
|
|
movl %r11d,28(%r15)
|
|
|
|
cmpq 80(%rbp),%r13
|
|
je .Ldone_avx2
|
|
|
|
xorl %r14d,%r14d
|
|
movl %ebx,%esi
|
|
movl %r9d,%r12d
|
|
xorl %ecx,%esi
|
|
jmp .Lower_avx2
|
|
.align 16
|
|
.Lower_avx2:
|
|
vmovdqu (%r13),%xmm9
|
|
vpinsrq $0,%r13,%xmm15,%xmm15
|
|
addl 0+16(%rbp),%r11d
|
|
andl %r8d,%r12d
|
|
rorxl $25,%r8d,%r13d
|
|
rorxl $11,%r8d,%r15d
|
|
leal (%rax,%r14,1),%eax
|
|
leal (%r11,%r12,1),%r11d
|
|
andnl %r10d,%r8d,%r12d
|
|
xorl %r15d,%r13d
|
|
rorxl $6,%r8d,%r14d
|
|
leal (%r11,%r12,1),%r11d
|
|
xorl %r14d,%r13d
|
|
movl %eax,%r15d
|
|
rorxl $22,%eax,%r12d
|
|
leal (%r11,%r13,1),%r11d
|
|
xorl %ebx,%r15d
|
|
rorxl $13,%eax,%r14d
|
|
rorxl $2,%eax,%r13d
|
|
leal (%rdx,%r11,1),%edx
|
|
andl %r15d,%esi
|
|
vpxor %xmm10,%xmm9,%xmm9
|
|
vmovdqu 16-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %ebx,%esi
|
|
xorl %r13d,%r14d
|
|
leal (%r11,%rsi,1),%r11d
|
|
movl %r8d,%r12d
|
|
addl 4+16(%rbp),%r10d
|
|
andl %edx,%r12d
|
|
rorxl $25,%edx,%r13d
|
|
rorxl $11,%edx,%esi
|
|
leal (%r11,%r14,1),%r11d
|
|
leal (%r10,%r12,1),%r10d
|
|
andnl %r9d,%edx,%r12d
|
|
xorl %esi,%r13d
|
|
rorxl $6,%edx,%r14d
|
|
leal (%r10,%r12,1),%r10d
|
|
xorl %r14d,%r13d
|
|
movl %r11d,%esi
|
|
rorxl $22,%r11d,%r12d
|
|
leal (%r10,%r13,1),%r10d
|
|
xorl %eax,%esi
|
|
rorxl $13,%r11d,%r14d
|
|
rorxl $2,%r11d,%r13d
|
|
leal (%rcx,%r10,1),%ecx
|
|
andl %esi,%r15d
|
|
vpxor %xmm8,%xmm9,%xmm9
|
|
xorl %r12d,%r14d
|
|
xorl %eax,%r15d
|
|
xorl %r13d,%r14d
|
|
leal (%r10,%r15,1),%r10d
|
|
movl %edx,%r12d
|
|
addl 8+16(%rbp),%r9d
|
|
andl %ecx,%r12d
|
|
rorxl $25,%ecx,%r13d
|
|
rorxl $11,%ecx,%r15d
|
|
leal (%r10,%r14,1),%r10d
|
|
leal (%r9,%r12,1),%r9d
|
|
andnl %r8d,%ecx,%r12d
|
|
xorl %r15d,%r13d
|
|
rorxl $6,%ecx,%r14d
|
|
leal (%r9,%r12,1),%r9d
|
|
xorl %r14d,%r13d
|
|
movl %r10d,%r15d
|
|
rorxl $22,%r10d,%r12d
|
|
leal (%r9,%r13,1),%r9d
|
|
xorl %r11d,%r15d
|
|
rorxl $13,%r10d,%r14d
|
|
rorxl $2,%r10d,%r13d
|
|
leal (%rbx,%r9,1),%ebx
|
|
andl %r15d,%esi
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 32-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %r11d,%esi
|
|
xorl %r13d,%r14d
|
|
leal (%r9,%rsi,1),%r9d
|
|
movl %ecx,%r12d
|
|
addl 12+16(%rbp),%r8d
|
|
andl %ebx,%r12d
|
|
rorxl $25,%ebx,%r13d
|
|
rorxl $11,%ebx,%esi
|
|
leal (%r9,%r14,1),%r9d
|
|
leal (%r8,%r12,1),%r8d
|
|
andnl %edx,%ebx,%r12d
|
|
xorl %esi,%r13d
|
|
rorxl $6,%ebx,%r14d
|
|
leal (%r8,%r12,1),%r8d
|
|
xorl %r14d,%r13d
|
|
movl %r9d,%esi
|
|
rorxl $22,%r9d,%r12d
|
|
leal (%r8,%r13,1),%r8d
|
|
xorl %r10d,%esi
|
|
rorxl $13,%r9d,%r14d
|
|
rorxl $2,%r9d,%r13d
|
|
leal (%rax,%r8,1),%eax
|
|
andl %esi,%r15d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 48-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %r10d,%r15d
|
|
xorl %r13d,%r14d
|
|
leal (%r8,%r15,1),%r8d
|
|
movl %ebx,%r12d
|
|
addl 32+16(%rbp),%edx
|
|
andl %eax,%r12d
|
|
rorxl $25,%eax,%r13d
|
|
rorxl $11,%eax,%r15d
|
|
leal (%r8,%r14,1),%r8d
|
|
leal (%rdx,%r12,1),%edx
|
|
andnl %ecx,%eax,%r12d
|
|
xorl %r15d,%r13d
|
|
rorxl $6,%eax,%r14d
|
|
leal (%rdx,%r12,1),%edx
|
|
xorl %r14d,%r13d
|
|
movl %r8d,%r15d
|
|
rorxl $22,%r8d,%r12d
|
|
leal (%rdx,%r13,1),%edx
|
|
xorl %r9d,%r15d
|
|
rorxl $13,%r8d,%r14d
|
|
rorxl $2,%r8d,%r13d
|
|
leal (%r11,%rdx,1),%r11d
|
|
andl %r15d,%esi
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 64-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %r9d,%esi
|
|
xorl %r13d,%r14d
|
|
leal (%rdx,%rsi,1),%edx
|
|
movl %eax,%r12d
|
|
addl 36+16(%rbp),%ecx
|
|
andl %r11d,%r12d
|
|
rorxl $25,%r11d,%r13d
|
|
rorxl $11,%r11d,%esi
|
|
leal (%rdx,%r14,1),%edx
|
|
leal (%rcx,%r12,1),%ecx
|
|
andnl %ebx,%r11d,%r12d
|
|
xorl %esi,%r13d
|
|
rorxl $6,%r11d,%r14d
|
|
leal (%rcx,%r12,1),%ecx
|
|
xorl %r14d,%r13d
|
|
movl %edx,%esi
|
|
rorxl $22,%edx,%r12d
|
|
leal (%rcx,%r13,1),%ecx
|
|
xorl %r8d,%esi
|
|
rorxl $13,%edx,%r14d
|
|
rorxl $2,%edx,%r13d
|
|
leal (%r10,%rcx,1),%r10d
|
|
andl %esi,%r15d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 80-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %r8d,%r15d
|
|
xorl %r13d,%r14d
|
|
leal (%rcx,%r15,1),%ecx
|
|
movl %r11d,%r12d
|
|
addl 40+16(%rbp),%ebx
|
|
andl %r10d,%r12d
|
|
rorxl $25,%r10d,%r13d
|
|
rorxl $11,%r10d,%r15d
|
|
leal (%rcx,%r14,1),%ecx
|
|
leal (%rbx,%r12,1),%ebx
|
|
andnl %eax,%r10d,%r12d
|
|
xorl %r15d,%r13d
|
|
rorxl $6,%r10d,%r14d
|
|
leal (%rbx,%r12,1),%ebx
|
|
xorl %r14d,%r13d
|
|
movl %ecx,%r15d
|
|
rorxl $22,%ecx,%r12d
|
|
leal (%rbx,%r13,1),%ebx
|
|
xorl %edx,%r15d
|
|
rorxl $13,%ecx,%r14d
|
|
rorxl $2,%ecx,%r13d
|
|
leal (%r9,%rbx,1),%r9d
|
|
andl %r15d,%esi
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 96-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %edx,%esi
|
|
xorl %r13d,%r14d
|
|
leal (%rbx,%rsi,1),%ebx
|
|
movl %r10d,%r12d
|
|
addl 44+16(%rbp),%eax
|
|
andl %r9d,%r12d
|
|
rorxl $25,%r9d,%r13d
|
|
rorxl $11,%r9d,%esi
|
|
leal (%rbx,%r14,1),%ebx
|
|
leal (%rax,%r12,1),%eax
|
|
andnl %r11d,%r9d,%r12d
|
|
xorl %esi,%r13d
|
|
rorxl $6,%r9d,%r14d
|
|
leal (%rax,%r12,1),%eax
|
|
xorl %r14d,%r13d
|
|
movl %ebx,%esi
|
|
rorxl $22,%ebx,%r12d
|
|
leal (%rax,%r13,1),%eax
|
|
xorl %ecx,%esi
|
|
rorxl $13,%ebx,%r14d
|
|
rorxl $2,%ebx,%r13d
|
|
leal (%r8,%rax,1),%r8d
|
|
andl %esi,%r15d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 112-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %ecx,%r15d
|
|
xorl %r13d,%r14d
|
|
leal (%rax,%r15,1),%eax
|
|
movl %r9d,%r12d
|
|
leaq -64(%rbp),%rbp
|
|
addl 0+16(%rbp),%r11d
|
|
andl %r8d,%r12d
|
|
rorxl $25,%r8d,%r13d
|
|
rorxl $11,%r8d,%r15d
|
|
leal (%rax,%r14,1),%eax
|
|
leal (%r11,%r12,1),%r11d
|
|
andnl %r10d,%r8d,%r12d
|
|
xorl %r15d,%r13d
|
|
rorxl $6,%r8d,%r14d
|
|
leal (%r11,%r12,1),%r11d
|
|
xorl %r14d,%r13d
|
|
movl %eax,%r15d
|
|
rorxl $22,%eax,%r12d
|
|
leal (%r11,%r13,1),%r11d
|
|
xorl %ebx,%r15d
|
|
rorxl $13,%eax,%r14d
|
|
rorxl $2,%eax,%r13d
|
|
leal (%rdx,%r11,1),%edx
|
|
andl %r15d,%esi
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 128-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %ebx,%esi
|
|
xorl %r13d,%r14d
|
|
leal (%r11,%rsi,1),%r11d
|
|
movl %r8d,%r12d
|
|
addl 4+16(%rbp),%r10d
|
|
andl %edx,%r12d
|
|
rorxl $25,%edx,%r13d
|
|
rorxl $11,%edx,%esi
|
|
leal (%r11,%r14,1),%r11d
|
|
leal (%r10,%r12,1),%r10d
|
|
andnl %r9d,%edx,%r12d
|
|
xorl %esi,%r13d
|
|
rorxl $6,%edx,%r14d
|
|
leal (%r10,%r12,1),%r10d
|
|
xorl %r14d,%r13d
|
|
movl %r11d,%esi
|
|
rorxl $22,%r11d,%r12d
|
|
leal (%r10,%r13,1),%r10d
|
|
xorl %eax,%esi
|
|
rorxl $13,%r11d,%r14d
|
|
rorxl $2,%r11d,%r13d
|
|
leal (%rcx,%r10,1),%ecx
|
|
andl %esi,%r15d
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 144-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %eax,%r15d
|
|
xorl %r13d,%r14d
|
|
leal (%r10,%r15,1),%r10d
|
|
movl %edx,%r12d
|
|
addl 8+16(%rbp),%r9d
|
|
andl %ecx,%r12d
|
|
rorxl $25,%ecx,%r13d
|
|
rorxl $11,%ecx,%r15d
|
|
leal (%r10,%r14,1),%r10d
|
|
leal (%r9,%r12,1),%r9d
|
|
andnl %r8d,%ecx,%r12d
|
|
xorl %r15d,%r13d
|
|
rorxl $6,%ecx,%r14d
|
|
leal (%r9,%r12,1),%r9d
|
|
xorl %r14d,%r13d
|
|
movl %r10d,%r15d
|
|
rorxl $22,%r10d,%r12d
|
|
leal (%r9,%r13,1),%r9d
|
|
xorl %r11d,%r15d
|
|
rorxl $13,%r10d,%r14d
|
|
rorxl $2,%r10d,%r13d
|
|
leal (%rbx,%r9,1),%ebx
|
|
andl %r15d,%esi
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 160-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %r11d,%esi
|
|
xorl %r13d,%r14d
|
|
leal (%r9,%rsi,1),%r9d
|
|
movl %ecx,%r12d
|
|
addl 12+16(%rbp),%r8d
|
|
andl %ebx,%r12d
|
|
rorxl $25,%ebx,%r13d
|
|
rorxl $11,%ebx,%esi
|
|
leal (%r9,%r14,1),%r9d
|
|
leal (%r8,%r12,1),%r8d
|
|
andnl %edx,%ebx,%r12d
|
|
xorl %esi,%r13d
|
|
rorxl $6,%ebx,%r14d
|
|
leal (%r8,%r12,1),%r8d
|
|
xorl %r14d,%r13d
|
|
movl %r9d,%esi
|
|
rorxl $22,%r9d,%r12d
|
|
leal (%r8,%r13,1),%r8d
|
|
xorl %r10d,%esi
|
|
rorxl $13,%r9d,%r14d
|
|
rorxl $2,%r9d,%r13d
|
|
leal (%rax,%r8,1),%eax
|
|
andl %esi,%r15d
|
|
vaesenclast %xmm10,%xmm9,%xmm11
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 176-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %r10d,%r15d
|
|
xorl %r13d,%r14d
|
|
leal (%r8,%r15,1),%r8d
|
|
movl %ebx,%r12d
|
|
addl 32+16(%rbp),%edx
|
|
andl %eax,%r12d
|
|
rorxl $25,%eax,%r13d
|
|
rorxl $11,%eax,%r15d
|
|
leal (%r8,%r14,1),%r8d
|
|
leal (%rdx,%r12,1),%edx
|
|
andnl %ecx,%eax,%r12d
|
|
xorl %r15d,%r13d
|
|
rorxl $6,%eax,%r14d
|
|
leal (%rdx,%r12,1),%edx
|
|
xorl %r14d,%r13d
|
|
movl %r8d,%r15d
|
|
rorxl $22,%r8d,%r12d
|
|
leal (%rdx,%r13,1),%edx
|
|
xorl %r9d,%r15d
|
|
rorxl $13,%r8d,%r14d
|
|
rorxl $2,%r8d,%r13d
|
|
leal (%r11,%rdx,1),%r11d
|
|
andl %r15d,%esi
|
|
vpand %xmm12,%xmm11,%xmm8
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 192-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %r9d,%esi
|
|
xorl %r13d,%r14d
|
|
leal (%rdx,%rsi,1),%edx
|
|
movl %eax,%r12d
|
|
addl 36+16(%rbp),%ecx
|
|
andl %r11d,%r12d
|
|
rorxl $25,%r11d,%r13d
|
|
rorxl $11,%r11d,%esi
|
|
leal (%rdx,%r14,1),%edx
|
|
leal (%rcx,%r12,1),%ecx
|
|
andnl %ebx,%r11d,%r12d
|
|
xorl %esi,%r13d
|
|
rorxl $6,%r11d,%r14d
|
|
leal (%rcx,%r12,1),%ecx
|
|
xorl %r14d,%r13d
|
|
movl %edx,%esi
|
|
rorxl $22,%edx,%r12d
|
|
leal (%rcx,%r13,1),%ecx
|
|
xorl %r8d,%esi
|
|
rorxl $13,%edx,%r14d
|
|
rorxl $2,%edx,%r13d
|
|
leal (%r10,%rcx,1),%r10d
|
|
andl %esi,%r15d
|
|
vaesenclast %xmm10,%xmm9,%xmm11
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 208-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %r8d,%r15d
|
|
xorl %r13d,%r14d
|
|
leal (%rcx,%r15,1),%ecx
|
|
movl %r11d,%r12d
|
|
addl 40+16(%rbp),%ebx
|
|
andl %r10d,%r12d
|
|
rorxl $25,%r10d,%r13d
|
|
rorxl $11,%r10d,%r15d
|
|
leal (%rcx,%r14,1),%ecx
|
|
leal (%rbx,%r12,1),%ebx
|
|
andnl %eax,%r10d,%r12d
|
|
xorl %r15d,%r13d
|
|
rorxl $6,%r10d,%r14d
|
|
leal (%rbx,%r12,1),%ebx
|
|
xorl %r14d,%r13d
|
|
movl %ecx,%r15d
|
|
rorxl $22,%ecx,%r12d
|
|
leal (%rbx,%r13,1),%ebx
|
|
xorl %edx,%r15d
|
|
rorxl $13,%ecx,%r14d
|
|
rorxl $2,%ecx,%r13d
|
|
leal (%r9,%rbx,1),%r9d
|
|
andl %r15d,%esi
|
|
vpand %xmm13,%xmm11,%xmm11
|
|
vaesenc %xmm10,%xmm9,%xmm9
|
|
vmovdqu 224-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %edx,%esi
|
|
xorl %r13d,%r14d
|
|
leal (%rbx,%rsi,1),%ebx
|
|
movl %r10d,%r12d
|
|
addl 44+16(%rbp),%eax
|
|
andl %r9d,%r12d
|
|
rorxl $25,%r9d,%r13d
|
|
rorxl $11,%r9d,%esi
|
|
leal (%rbx,%r14,1),%ebx
|
|
leal (%rax,%r12,1),%eax
|
|
andnl %r11d,%r9d,%r12d
|
|
xorl %esi,%r13d
|
|
rorxl $6,%r9d,%r14d
|
|
leal (%rax,%r12,1),%eax
|
|
xorl %r14d,%r13d
|
|
movl %ebx,%esi
|
|
rorxl $22,%ebx,%r12d
|
|
leal (%rax,%r13,1),%eax
|
|
xorl %ecx,%esi
|
|
rorxl $13,%ebx,%r14d
|
|
rorxl $2,%ebx,%r13d
|
|
leal (%r8,%rax,1),%r8d
|
|
andl %esi,%r15d
|
|
vpor %xmm11,%xmm8,%xmm8
|
|
vaesenclast %xmm10,%xmm9,%xmm11
|
|
vmovdqu 0-128(%rdi),%xmm10
|
|
xorl %r12d,%r14d
|
|
xorl %ecx,%r15d
|
|
xorl %r13d,%r14d
|
|
leal (%rax,%r15,1),%eax
|
|
movl %r9d,%r12d
|
|
vmovq %xmm15,%r13
|
|
vpextrq $1,%xmm15,%r15
|
|
vpand %xmm14,%xmm11,%xmm11
|
|
vpor %xmm11,%xmm8,%xmm8
|
|
leaq -64(%rbp),%rbp
|
|
vmovdqu %xmm8,(%r15,%r13,1)
|
|
leaq 16(%r13),%r13
|
|
cmpq %rsp,%rbp
|
|
jae .Lower_avx2
|
|
|
|
movq 552(%rsp),%r15
|
|
leaq 64(%r13),%r13
|
|
movq 560(%rsp),%rsi
|
|
addl %r14d,%eax
|
|
leaq 448(%rsp),%rsp
|
|
|
|
addl 0(%r15),%eax
|
|
addl 4(%r15),%ebx
|
|
addl 8(%r15),%ecx
|
|
addl 12(%r15),%edx
|
|
addl 16(%r15),%r8d
|
|
addl 20(%r15),%r9d
|
|
addl 24(%r15),%r10d
|
|
leaq (%rsi,%r13,1),%r12
|
|
addl 28(%r15),%r11d
|
|
|
|
cmpq 64+16(%rsp),%r13
|
|
|
|
movl %eax,0(%r15)
|
|
cmoveq %rsp,%r12
|
|
movl %ebx,4(%r15)
|
|
movl %ecx,8(%r15)
|
|
movl %edx,12(%r15)
|
|
movl %r8d,16(%r15)
|
|
movl %r9d,20(%r15)
|
|
movl %r10d,24(%r15)
|
|
movl %r11d,28(%r15)
|
|
|
|
jbe .Loop_avx2
|
|
leaq (%rsp),%rbp
|
|
|
|
.Ldone_avx2:
|
|
leaq (%rbp),%rsp
|
|
movq 64+32(%rsp),%r8
|
|
movq 120(%rsp),%rsi
|
|
.cfi_def_cfa %rsi,8
|
|
vmovdqu %xmm8,(%r8)
|
|
vzeroall
|
|
movq -48(%rsi),%r15
|
|
.cfi_restore %r15
|
|
movq -40(%rsi),%r14
|
|
.cfi_restore %r14
|
|
movq -32(%rsi),%r13
|
|
.cfi_restore %r13
|
|
movq -24(%rsi),%r12
|
|
.cfi_restore %r12
|
|
movq -16(%rsi),%rbp
|
|
.cfi_restore %rbp
|
|
movq -8(%rsi),%rbx
|
|
.cfi_restore %rbx
|
|
leaq (%rsi),%rsp
|
|
.cfi_def_cfa_register %rsp
|
|
.Lepilogue_avx2:
|
|
.byte 0xf3,0xc3
|
|
.cfi_endproc
|
|
.size aesni_cbc_sha256_enc_avx2,.-aesni_cbc_sha256_enc_avx2
|
|
.type aesni_cbc_sha256_enc_shaext,@function
|
|
.align 32
|
|
aesni_cbc_sha256_enc_shaext:
|
|
movq 8(%rsp),%r10
|
|
leaq K256+128(%rip),%rax
|
|
movdqu (%r9),%xmm1
|
|
movdqu 16(%r9),%xmm2
|
|
movdqa 512-128(%rax),%xmm3
|
|
|
|
movl 240(%rcx),%r11d
|
|
subq %rdi,%rsi
|
|
movups (%rcx),%xmm15
|
|
movups (%r8),%xmm6
|
|
movups 16(%rcx),%xmm4
|
|
leaq 112(%rcx),%rcx
|
|
|
|
pshufd $0x1b,%xmm1,%xmm0
|
|
pshufd $0xb1,%xmm1,%xmm1
|
|
pshufd $0x1b,%xmm2,%xmm2
|
|
movdqa %xmm3,%xmm7
|
|
.byte 102,15,58,15,202,8
|
|
punpcklqdq %xmm0,%xmm2
|
|
|
|
jmp .Loop_shaext
|
|
|
|
.align 16
|
|
.Loop_shaext:
|
|
movdqu (%r10),%xmm10
|
|
movdqu 16(%r10),%xmm11
|
|
movdqu 32(%r10),%xmm12
|
|
.byte 102,68,15,56,0,211
|
|
movdqu 48(%r10),%xmm13
|
|
|
|
movdqa 0-128(%rax),%xmm0
|
|
paddd %xmm10,%xmm0
|
|
.byte 102,68,15,56,0,219
|
|
movdqa %xmm2,%xmm9
|
|
movdqa %xmm1,%xmm8
|
|
movups 0(%rdi),%xmm14
|
|
xorps %xmm15,%xmm14
|
|
xorps %xmm14,%xmm6
|
|
movups -80(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
.byte 15,56,203,209
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
movups -64(%rcx),%xmm4
|
|
aesenc %xmm5,%xmm6
|
|
.byte 15,56,203,202
|
|
|
|
movdqa 32-128(%rax),%xmm0
|
|
paddd %xmm11,%xmm0
|
|
.byte 102,68,15,56,0,227
|
|
leaq 64(%r10),%r10
|
|
movups -48(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
.byte 15,56,203,209
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
movups -32(%rcx),%xmm4
|
|
aesenc %xmm5,%xmm6
|
|
.byte 15,56,203,202
|
|
|
|
movdqa 64-128(%rax),%xmm0
|
|
paddd %xmm12,%xmm0
|
|
.byte 102,68,15,56,0,235
|
|
.byte 69,15,56,204,211
|
|
movups -16(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
.byte 15,56,203,209
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
movdqa %xmm13,%xmm3
|
|
.byte 102,65,15,58,15,220,4
|
|
paddd %xmm3,%xmm10
|
|
movups 0(%rcx),%xmm4
|
|
aesenc %xmm5,%xmm6
|
|
.byte 15,56,203,202
|
|
|
|
movdqa 96-128(%rax),%xmm0
|
|
paddd %xmm13,%xmm0
|
|
.byte 69,15,56,205,213
|
|
.byte 69,15,56,204,220
|
|
movups 16(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
.byte 15,56,203,209
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
movups 32(%rcx),%xmm4
|
|
aesenc %xmm5,%xmm6
|
|
movdqa %xmm10,%xmm3
|
|
.byte 102,65,15,58,15,221,4
|
|
paddd %xmm3,%xmm11
|
|
.byte 15,56,203,202
|
|
movdqa 128-128(%rax),%xmm0
|
|
paddd %xmm10,%xmm0
|
|
.byte 69,15,56,205,218
|
|
.byte 69,15,56,204,229
|
|
movups 48(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
.byte 15,56,203,209
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
movdqa %xmm11,%xmm3
|
|
.byte 102,65,15,58,15,218,4
|
|
paddd %xmm3,%xmm12
|
|
cmpl $11,%r11d
|
|
jb .Laesenclast1
|
|
movups 64(%rcx),%xmm4
|
|
aesenc %xmm5,%xmm6
|
|
movups 80(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
je .Laesenclast1
|
|
movups 96(%rcx),%xmm4
|
|
aesenc %xmm5,%xmm6
|
|
movups 112(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
.Laesenclast1:
|
|
aesenclast %xmm5,%xmm6
|
|
movups 16-112(%rcx),%xmm4
|
|
nop
|
|
.byte 15,56,203,202
|
|
movups 16(%rdi),%xmm14
|
|
xorps %xmm15,%xmm14
|
|
movups %xmm6,0(%rsi,%rdi,1)
|
|
xorps %xmm14,%xmm6
|
|
movups -80(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
movdqa 160-128(%rax),%xmm0
|
|
paddd %xmm11,%xmm0
|
|
.byte 69,15,56,205,227
|
|
.byte 69,15,56,204,234
|
|
movups -64(%rcx),%xmm4
|
|
aesenc %xmm5,%xmm6
|
|
.byte 15,56,203,209
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
movdqa %xmm12,%xmm3
|
|
.byte 102,65,15,58,15,219,4
|
|
paddd %xmm3,%xmm13
|
|
movups -48(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
.byte 15,56,203,202
|
|
movdqa 192-128(%rax),%xmm0
|
|
paddd %xmm12,%xmm0
|
|
.byte 69,15,56,205,236
|
|
.byte 69,15,56,204,211
|
|
movups -32(%rcx),%xmm4
|
|
aesenc %xmm5,%xmm6
|
|
.byte 15,56,203,209
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
movdqa %xmm13,%xmm3
|
|
.byte 102,65,15,58,15,220,4
|
|
paddd %xmm3,%xmm10
|
|
movups -16(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
.byte 15,56,203,202
|
|
movdqa 224-128(%rax),%xmm0
|
|
paddd %xmm13,%xmm0
|
|
.byte 69,15,56,205,213
|
|
.byte 69,15,56,204,220
|
|
movups 0(%rcx),%xmm4
|
|
aesenc %xmm5,%xmm6
|
|
.byte 15,56,203,209
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
movdqa %xmm10,%xmm3
|
|
.byte 102,65,15,58,15,221,4
|
|
paddd %xmm3,%xmm11
|
|
movups 16(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
.byte 15,56,203,202
|
|
movdqa 256-128(%rax),%xmm0
|
|
paddd %xmm10,%xmm0
|
|
.byte 69,15,56,205,218
|
|
.byte 69,15,56,204,229
|
|
movups 32(%rcx),%xmm4
|
|
aesenc %xmm5,%xmm6
|
|
.byte 15,56,203,209
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
movdqa %xmm11,%xmm3
|
|
.byte 102,65,15,58,15,218,4
|
|
paddd %xmm3,%xmm12
|
|
movups 48(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
cmpl $11,%r11d
|
|
jb .Laesenclast2
|
|
movups 64(%rcx),%xmm4
|
|
aesenc %xmm5,%xmm6
|
|
movups 80(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
je .Laesenclast2
|
|
movups 96(%rcx),%xmm4
|
|
aesenc %xmm5,%xmm6
|
|
movups 112(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
.Laesenclast2:
|
|
aesenclast %xmm5,%xmm6
|
|
movups 16-112(%rcx),%xmm4
|
|
nop
|
|
.byte 15,56,203,202
|
|
movups 32(%rdi),%xmm14
|
|
xorps %xmm15,%xmm14
|
|
movups %xmm6,16(%rsi,%rdi,1)
|
|
xorps %xmm14,%xmm6
|
|
movups -80(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
movdqa 288-128(%rax),%xmm0
|
|
paddd %xmm11,%xmm0
|
|
.byte 69,15,56,205,227
|
|
.byte 69,15,56,204,234
|
|
movups -64(%rcx),%xmm4
|
|
aesenc %xmm5,%xmm6
|
|
.byte 15,56,203,209
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
movdqa %xmm12,%xmm3
|
|
.byte 102,65,15,58,15,219,4
|
|
paddd %xmm3,%xmm13
|
|
movups -48(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
.byte 15,56,203,202
|
|
movdqa 320-128(%rax),%xmm0
|
|
paddd %xmm12,%xmm0
|
|
.byte 69,15,56,205,236
|
|
.byte 69,15,56,204,211
|
|
movups -32(%rcx),%xmm4
|
|
aesenc %xmm5,%xmm6
|
|
.byte 15,56,203,209
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
movdqa %xmm13,%xmm3
|
|
.byte 102,65,15,58,15,220,4
|
|
paddd %xmm3,%xmm10
|
|
movups -16(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
.byte 15,56,203,202
|
|
movdqa 352-128(%rax),%xmm0
|
|
paddd %xmm13,%xmm0
|
|
.byte 69,15,56,205,213
|
|
.byte 69,15,56,204,220
|
|
movups 0(%rcx),%xmm4
|
|
aesenc %xmm5,%xmm6
|
|
.byte 15,56,203,209
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
movdqa %xmm10,%xmm3
|
|
.byte 102,65,15,58,15,221,4
|
|
paddd %xmm3,%xmm11
|
|
movups 16(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
.byte 15,56,203,202
|
|
movdqa 384-128(%rax),%xmm0
|
|
paddd %xmm10,%xmm0
|
|
.byte 69,15,56,205,218
|
|
.byte 69,15,56,204,229
|
|
movups 32(%rcx),%xmm4
|
|
aesenc %xmm5,%xmm6
|
|
.byte 15,56,203,209
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
movdqa %xmm11,%xmm3
|
|
.byte 102,65,15,58,15,218,4
|
|
paddd %xmm3,%xmm12
|
|
movups 48(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
.byte 15,56,203,202
|
|
movdqa 416-128(%rax),%xmm0
|
|
paddd %xmm11,%xmm0
|
|
.byte 69,15,56,205,227
|
|
.byte 69,15,56,204,234
|
|
cmpl $11,%r11d
|
|
jb .Laesenclast3
|
|
movups 64(%rcx),%xmm4
|
|
aesenc %xmm5,%xmm6
|
|
movups 80(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
je .Laesenclast3
|
|
movups 96(%rcx),%xmm4
|
|
aesenc %xmm5,%xmm6
|
|
movups 112(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
.Laesenclast3:
|
|
aesenclast %xmm5,%xmm6
|
|
movups 16-112(%rcx),%xmm4
|
|
nop
|
|
.byte 15,56,203,209
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
movdqa %xmm12,%xmm3
|
|
.byte 102,65,15,58,15,219,4
|
|
paddd %xmm3,%xmm13
|
|
movups 48(%rdi),%xmm14
|
|
xorps %xmm15,%xmm14
|
|
movups %xmm6,32(%rsi,%rdi,1)
|
|
xorps %xmm14,%xmm6
|
|
movups -80(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
movups -64(%rcx),%xmm4
|
|
aesenc %xmm5,%xmm6
|
|
.byte 15,56,203,202
|
|
|
|
movdqa 448-128(%rax),%xmm0
|
|
paddd %xmm12,%xmm0
|
|
.byte 69,15,56,205,236
|
|
movdqa %xmm7,%xmm3
|
|
movups -48(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
.byte 15,56,203,209
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
movups -32(%rcx),%xmm4
|
|
aesenc %xmm5,%xmm6
|
|
.byte 15,56,203,202
|
|
|
|
movdqa 480-128(%rax),%xmm0
|
|
paddd %xmm13,%xmm0
|
|
movups -16(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
movups 0(%rcx),%xmm4
|
|
aesenc %xmm5,%xmm6
|
|
.byte 15,56,203,209
|
|
pshufd $0x0e,%xmm0,%xmm0
|
|
movups 16(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
.byte 15,56,203,202
|
|
|
|
movups 32(%rcx),%xmm4
|
|
aesenc %xmm5,%xmm6
|
|
movups 48(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
cmpl $11,%r11d
|
|
jb .Laesenclast4
|
|
movups 64(%rcx),%xmm4
|
|
aesenc %xmm5,%xmm6
|
|
movups 80(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
je .Laesenclast4
|
|
movups 96(%rcx),%xmm4
|
|
aesenc %xmm5,%xmm6
|
|
movups 112(%rcx),%xmm5
|
|
aesenc %xmm4,%xmm6
|
|
.Laesenclast4:
|
|
aesenclast %xmm5,%xmm6
|
|
movups 16-112(%rcx),%xmm4
|
|
nop
|
|
|
|
paddd %xmm9,%xmm2
|
|
paddd %xmm8,%xmm1
|
|
|
|
decq %rdx
|
|
movups %xmm6,48(%rsi,%rdi,1)
|
|
leaq 64(%rdi),%rdi
|
|
jnz .Loop_shaext
|
|
|
|
pshufd $0xb1,%xmm2,%xmm2
|
|
pshufd $0x1b,%xmm1,%xmm3
|
|
pshufd $0xb1,%xmm1,%xmm1
|
|
punpckhqdq %xmm2,%xmm1
|
|
.byte 102,15,58,15,211,8
|
|
|
|
movups %xmm6,(%r8)
|
|
movdqu %xmm1,(%r9)
|
|
movdqu %xmm2,16(%r9)
|
|
.byte 0xf3,0xc3
|
|
.size aesni_cbc_sha256_enc_shaext,.-aesni_cbc_sha256_enc_shaext
|