freebsd-dev/sys/crypto/openssl/i386/chacha-x86.S
2020-10-20 17:00:43 +00:00

2022 lines
42 KiB
ArmAsm

/* $FreeBSD$ */
/* Do not modify. This file is auto-generated from chacha-x86.pl. */
#ifdef PIC
.text
.globl ChaCha20_ctr32
.type ChaCha20_ctr32,@function
.align 16
ChaCha20_ctr32:
.L_ChaCha20_ctr32_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
xorl %eax,%eax
cmpl 28(%esp),%eax
je .L000no_data
call .Lpic_point
.Lpic_point:
popl %eax
leal OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp
testl $16777216,(%ebp)
jz .L001x86
testl $512,4(%ebp)
jz .L001x86
jmp .Lssse3_shortcut
.L001x86:
movl 32(%esp),%esi
movl 36(%esp),%edi
subl $132,%esp
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
movl 12(%esi),%edx
movl %eax,80(%esp)
movl %ebx,84(%esp)
movl %ecx,88(%esp)
movl %edx,92(%esp)
movl 16(%esi),%eax
movl 20(%esi),%ebx
movl 24(%esi),%ecx
movl 28(%esi),%edx
movl %eax,96(%esp)
movl %ebx,100(%esp)
movl %ecx,104(%esp)
movl %edx,108(%esp)
movl (%edi),%eax
movl 4(%edi),%ebx
movl 8(%edi),%ecx
movl 12(%edi),%edx
subl $1,%eax
movl %eax,112(%esp)
movl %ebx,116(%esp)
movl %ecx,120(%esp)
movl %edx,124(%esp)
jmp .L002entry
.align 16
.L003outer_loop:
movl %ebx,156(%esp)
movl %eax,152(%esp)
movl %ecx,160(%esp)
.L002entry:
movl $1634760805,%eax
movl $857760878,4(%esp)
movl $2036477234,8(%esp)
movl $1797285236,12(%esp)
movl 84(%esp),%ebx
movl 88(%esp),%ebp
movl 104(%esp),%ecx
movl 108(%esp),%esi
movl 116(%esp),%edx
movl 120(%esp),%edi
movl %ebx,20(%esp)
movl %ebp,24(%esp)
movl %ecx,40(%esp)
movl %esi,44(%esp)
movl %edx,52(%esp)
movl %edi,56(%esp)
movl 92(%esp),%ebx
movl 124(%esp),%edi
movl 112(%esp),%edx
movl 80(%esp),%ebp
movl 96(%esp),%ecx
movl 100(%esp),%esi
addl $1,%edx
movl %ebx,28(%esp)
movl %edi,60(%esp)
movl %edx,112(%esp)
movl $10,%ebx
jmp .L004loop
.align 16
.L004loop:
addl %ebp,%eax
movl %ebx,128(%esp)
movl %ebp,%ebx
xorl %eax,%edx
roll $16,%edx
addl %edx,%ecx
xorl %ecx,%ebx
movl 52(%esp),%edi
roll $12,%ebx
movl 20(%esp),%ebp
addl %ebx,%eax
xorl %eax,%edx
movl %eax,(%esp)
roll $8,%edx
movl 4(%esp),%eax
addl %edx,%ecx
movl %edx,48(%esp)
xorl %ecx,%ebx
addl %ebp,%eax
roll $7,%ebx
xorl %eax,%edi
movl %ecx,32(%esp)
roll $16,%edi
movl %ebx,16(%esp)
addl %edi,%esi
movl 40(%esp),%ecx
xorl %esi,%ebp
movl 56(%esp),%edx
roll $12,%ebp
movl 24(%esp),%ebx
addl %ebp,%eax
xorl %eax,%edi
movl %eax,4(%esp)
roll $8,%edi
movl 8(%esp),%eax
addl %edi,%esi
movl %edi,52(%esp)
xorl %esi,%ebp
addl %ebx,%eax
roll $7,%ebp
xorl %eax,%edx
movl %esi,36(%esp)
roll $16,%edx
movl %ebp,20(%esp)
addl %edx,%ecx
movl 44(%esp),%esi
xorl %ecx,%ebx
movl 60(%esp),%edi
roll $12,%ebx
movl 28(%esp),%ebp
addl %ebx,%eax
xorl %eax,%edx
movl %eax,8(%esp)
roll $8,%edx
movl 12(%esp),%eax
addl %edx,%ecx
movl %edx,56(%esp)
xorl %ecx,%ebx
addl %ebp,%eax
roll $7,%ebx
xorl %eax,%edi
roll $16,%edi
movl %ebx,24(%esp)
addl %edi,%esi
xorl %esi,%ebp
roll $12,%ebp
movl 20(%esp),%ebx
addl %ebp,%eax
xorl %eax,%edi
movl %eax,12(%esp)
roll $8,%edi
movl (%esp),%eax
addl %edi,%esi
movl %edi,%edx
xorl %esi,%ebp
addl %ebx,%eax
roll $7,%ebp
xorl %eax,%edx
roll $16,%edx
movl %ebp,28(%esp)
addl %edx,%ecx
xorl %ecx,%ebx
movl 48(%esp),%edi
roll $12,%ebx
movl 24(%esp),%ebp
addl %ebx,%eax
xorl %eax,%edx
movl %eax,(%esp)
roll $8,%edx
movl 4(%esp),%eax
addl %edx,%ecx
movl %edx,60(%esp)
xorl %ecx,%ebx
addl %ebp,%eax
roll $7,%ebx
xorl %eax,%edi
movl %ecx,40(%esp)
roll $16,%edi
movl %ebx,20(%esp)
addl %edi,%esi
movl 32(%esp),%ecx
xorl %esi,%ebp
movl 52(%esp),%edx
roll $12,%ebp
movl 28(%esp),%ebx
addl %ebp,%eax
xorl %eax,%edi
movl %eax,4(%esp)
roll $8,%edi
movl 8(%esp),%eax
addl %edi,%esi
movl %edi,48(%esp)
xorl %esi,%ebp
addl %ebx,%eax
roll $7,%ebp
xorl %eax,%edx
movl %esi,44(%esp)
roll $16,%edx
movl %ebp,24(%esp)
addl %edx,%ecx
movl 36(%esp),%esi
xorl %ecx,%ebx
movl 56(%esp),%edi
roll $12,%ebx
movl 16(%esp),%ebp
addl %ebx,%eax
xorl %eax,%edx
movl %eax,8(%esp)
roll $8,%edx
movl 12(%esp),%eax
addl %edx,%ecx
movl %edx,52(%esp)
xorl %ecx,%ebx
addl %ebp,%eax
roll $7,%ebx
xorl %eax,%edi
roll $16,%edi
movl %ebx,28(%esp)
addl %edi,%esi
xorl %esi,%ebp
movl 48(%esp),%edx
roll $12,%ebp
movl 128(%esp),%ebx
addl %ebp,%eax
xorl %eax,%edi
movl %eax,12(%esp)
roll $8,%edi
movl (%esp),%eax
addl %edi,%esi
movl %edi,56(%esp)
xorl %esi,%ebp
roll $7,%ebp
decl %ebx
jnz .L004loop
movl 160(%esp),%ebx
addl $1634760805,%eax
addl 80(%esp),%ebp
addl 96(%esp),%ecx
addl 100(%esp),%esi
cmpl $64,%ebx
jb .L005tail
movl 156(%esp),%ebx
addl 112(%esp),%edx
addl 120(%esp),%edi
xorl (%ebx),%eax
xorl 16(%ebx),%ebp
movl %eax,(%esp)
movl 152(%esp),%eax
xorl 32(%ebx),%ecx
xorl 36(%ebx),%esi
xorl 48(%ebx),%edx
xorl 56(%ebx),%edi
movl %ebp,16(%eax)
movl %ecx,32(%eax)
movl %esi,36(%eax)
movl %edx,48(%eax)
movl %edi,56(%eax)
movl 4(%esp),%ebp
movl 8(%esp),%ecx
movl 12(%esp),%esi
movl 20(%esp),%edx
movl 24(%esp),%edi
addl $857760878,%ebp
addl $2036477234,%ecx
addl $1797285236,%esi
addl 84(%esp),%edx
addl 88(%esp),%edi
xorl 4(%ebx),%ebp
xorl 8(%ebx),%ecx
xorl 12(%ebx),%esi
xorl 20(%ebx),%edx
xorl 24(%ebx),%edi
movl %ebp,4(%eax)
movl %ecx,8(%eax)
movl %esi,12(%eax)
movl %edx,20(%eax)
movl %edi,24(%eax)
movl 28(%esp),%ebp
movl 40(%esp),%ecx
movl 44(%esp),%esi
movl 52(%esp),%edx
movl 60(%esp),%edi
addl 92(%esp),%ebp
addl 104(%esp),%ecx
addl 108(%esp),%esi
addl 116(%esp),%edx
addl 124(%esp),%edi
xorl 28(%ebx),%ebp
xorl 40(%ebx),%ecx
xorl 44(%ebx),%esi
xorl 52(%ebx),%edx
xorl 60(%ebx),%edi
leal 64(%ebx),%ebx
movl %ebp,28(%eax)
movl (%esp),%ebp
movl %ecx,40(%eax)
movl 160(%esp),%ecx
movl %esi,44(%eax)
movl %edx,52(%eax)
movl %edi,60(%eax)
movl %ebp,(%eax)
leal 64(%eax),%eax
subl $64,%ecx
jnz .L003outer_loop
jmp .L006done
.L005tail:
addl 112(%esp),%edx
addl 120(%esp),%edi
movl %eax,(%esp)
movl %ebp,16(%esp)
movl %ecx,32(%esp)
movl %esi,36(%esp)
movl %edx,48(%esp)
movl %edi,56(%esp)
movl 4(%esp),%ebp
movl 8(%esp),%ecx
movl 12(%esp),%esi
movl 20(%esp),%edx
movl 24(%esp),%edi
addl $857760878,%ebp
addl $2036477234,%ecx
addl $1797285236,%esi
addl 84(%esp),%edx
addl 88(%esp),%edi
movl %ebp,4(%esp)
movl %ecx,8(%esp)
movl %esi,12(%esp)
movl %edx,20(%esp)
movl %edi,24(%esp)
movl 28(%esp),%ebp
movl 40(%esp),%ecx
movl 44(%esp),%esi
movl 52(%esp),%edx
movl 60(%esp),%edi
addl 92(%esp),%ebp
addl 104(%esp),%ecx
addl 108(%esp),%esi
addl 116(%esp),%edx
addl 124(%esp),%edi
movl %ebp,28(%esp)
movl 156(%esp),%ebp
movl %ecx,40(%esp)
movl 152(%esp),%ecx
movl %esi,44(%esp)
xorl %esi,%esi
movl %edx,52(%esp)
movl %edi,60(%esp)
xorl %eax,%eax
xorl %edx,%edx
.L007tail_loop:
movb (%esi,%ebp,1),%al
movb (%esp,%esi,1),%dl
leal 1(%esi),%esi
xorb %dl,%al
movb %al,-1(%ecx,%esi,1)
decl %ebx
jnz .L007tail_loop
.L006done:
addl $132,%esp
.L000no_data:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
.globl ChaCha20_ssse3
.type ChaCha20_ssse3,@function
.align 16
ChaCha20_ssse3:
.L_ChaCha20_ssse3_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
.Lssse3_shortcut:
testl $2048,4(%ebp)
jnz .Lxop_shortcut
movl 20(%esp),%edi
movl 24(%esp),%esi
movl 28(%esp),%ecx
movl 32(%esp),%edx
movl 36(%esp),%ebx
movl %esp,%ebp
subl $524,%esp
andl $-64,%esp
movl %ebp,512(%esp)
leal .Lssse3_data-.Lpic_point(%eax),%eax
movdqu (%ebx),%xmm3
.L0081x:
movdqa 32(%eax),%xmm0
movdqu (%edx),%xmm1
movdqu 16(%edx),%xmm2
movdqa (%eax),%xmm6
movdqa 16(%eax),%xmm7
movl %ebp,48(%esp)
movdqa %xmm0,(%esp)
movdqa %xmm1,16(%esp)
movdqa %xmm2,32(%esp)
movdqa %xmm3,48(%esp)
movl $10,%edx
jmp .L009loop1x
.align 16
.L010outer1x:
movdqa 80(%eax),%xmm3
movdqa (%esp),%xmm0
movdqa 16(%esp),%xmm1
movdqa 32(%esp),%xmm2
paddd 48(%esp),%xmm3
movl $10,%edx
movdqa %xmm3,48(%esp)
jmp .L009loop1x
.align 16
.L009loop1x:
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
.byte 102,15,56,0,222
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm4
psrld $20,%xmm1
pslld $12,%xmm4
por %xmm4,%xmm1
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
.byte 102,15,56,0,223
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm4
psrld $25,%xmm1
pslld $7,%xmm4
por %xmm4,%xmm1
pshufd $78,%xmm2,%xmm2
pshufd $57,%xmm1,%xmm1
pshufd $147,%xmm3,%xmm3
nop
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
.byte 102,15,56,0,222
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm4
psrld $20,%xmm1
pslld $12,%xmm4
por %xmm4,%xmm1
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
.byte 102,15,56,0,223
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm4
psrld $25,%xmm1
pslld $7,%xmm4
por %xmm4,%xmm1
pshufd $78,%xmm2,%xmm2
pshufd $147,%xmm1,%xmm1
pshufd $57,%xmm3,%xmm3
decl %edx
jnz .L009loop1x
paddd (%esp),%xmm0
paddd 16(%esp),%xmm1
paddd 32(%esp),%xmm2
paddd 48(%esp),%xmm3
cmpl $64,%ecx
jb .L011tail
movdqu (%esi),%xmm4
movdqu 16(%esi),%xmm5
pxor %xmm4,%xmm0
movdqu 32(%esi),%xmm4
pxor %xmm5,%xmm1
movdqu 48(%esi),%xmm5
pxor %xmm4,%xmm2
pxor %xmm5,%xmm3
leal 64(%esi),%esi
movdqu %xmm0,(%edi)
movdqu %xmm1,16(%edi)
movdqu %xmm2,32(%edi)
movdqu %xmm3,48(%edi)
leal 64(%edi),%edi
subl $64,%ecx
jnz .L010outer1x
jmp .L012done
.L011tail:
movdqa %xmm0,(%esp)
movdqa %xmm1,16(%esp)
movdqa %xmm2,32(%esp)
movdqa %xmm3,48(%esp)
xorl %eax,%eax
xorl %edx,%edx
xorl %ebp,%ebp
.L013tail_loop:
movb (%esp,%ebp,1),%al
movb (%esi,%ebp,1),%dl
leal 1(%ebp),%ebp
xorb %dl,%al
movb %al,-1(%edi,%ebp,1)
decl %ecx
jnz .L013tail_loop
.L012done:
movl 512(%esp),%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
.align 64
.Lssse3_data:
.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
.long 1634760805,857760878,2036477234,1797285236
.long 0,1,2,3
.long 4,4,4,4
.long 1,0,0,0
.long 4,0,0,0
.long 0,-1,-1,-1
.align 64
.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
.byte 114,103,62,0
.globl ChaCha20_xop
.type ChaCha20_xop,@function
.align 16
ChaCha20_xop:
.L_ChaCha20_xop_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
.Lxop_shortcut:
movl 20(%esp),%edi
movl 24(%esp),%esi
movl 28(%esp),%ecx
movl 32(%esp),%edx
movl 36(%esp),%ebx
vzeroupper
movl %esp,%ebp
subl $524,%esp
andl $-64,%esp
movl %ebp,512(%esp)
leal .Lssse3_data-.Lpic_point(%eax),%eax
vmovdqu (%ebx),%xmm3
cmpl $256,%ecx
jb .L0141x
movl %edx,516(%esp)
movl %ebx,520(%esp)
subl $256,%ecx
leal 384(%esp),%ebp
vmovdqu (%edx),%xmm7
vpshufd $0,%xmm3,%xmm0
vpshufd $85,%xmm3,%xmm1
vpshufd $170,%xmm3,%xmm2
vpshufd $255,%xmm3,%xmm3
vpaddd 48(%eax),%xmm0,%xmm0
vpshufd $0,%xmm7,%xmm4
vpshufd $85,%xmm7,%xmm5
vpsubd 64(%eax),%xmm0,%xmm0
vpshufd $170,%xmm7,%xmm6
vpshufd $255,%xmm7,%xmm7
vmovdqa %xmm0,64(%ebp)
vmovdqa %xmm1,80(%ebp)
vmovdqa %xmm2,96(%ebp)
vmovdqa %xmm3,112(%ebp)
vmovdqu 16(%edx),%xmm3
vmovdqa %xmm4,-64(%ebp)
vmovdqa %xmm5,-48(%ebp)
vmovdqa %xmm6,-32(%ebp)
vmovdqa %xmm7,-16(%ebp)
vmovdqa 32(%eax),%xmm7
leal 128(%esp),%ebx
vpshufd $0,%xmm3,%xmm0
vpshufd $85,%xmm3,%xmm1
vpshufd $170,%xmm3,%xmm2
vpshufd $255,%xmm3,%xmm3
vpshufd $0,%xmm7,%xmm4
vpshufd $85,%xmm7,%xmm5
vpshufd $170,%xmm7,%xmm6
vpshufd $255,%xmm7,%xmm7
vmovdqa %xmm0,(%ebp)
vmovdqa %xmm1,16(%ebp)
vmovdqa %xmm2,32(%ebp)
vmovdqa %xmm3,48(%ebp)
vmovdqa %xmm4,-128(%ebp)
vmovdqa %xmm5,-112(%ebp)
vmovdqa %xmm6,-96(%ebp)
vmovdqa %xmm7,-80(%ebp)
leal 128(%esi),%esi
leal 128(%edi),%edi
jmp .L015outer_loop
.align 32
.L015outer_loop:
vmovdqa -112(%ebp),%xmm1
vmovdqa -96(%ebp),%xmm2
vmovdqa -80(%ebp),%xmm3
vmovdqa -48(%ebp),%xmm5
vmovdqa -32(%ebp),%xmm6
vmovdqa -16(%ebp),%xmm7
vmovdqa %xmm1,-112(%ebx)
vmovdqa %xmm2,-96(%ebx)
vmovdqa %xmm3,-80(%ebx)
vmovdqa %xmm5,-48(%ebx)
vmovdqa %xmm6,-32(%ebx)
vmovdqa %xmm7,-16(%ebx)
vmovdqa 32(%ebp),%xmm2
vmovdqa 48(%ebp),%xmm3
vmovdqa 64(%ebp),%xmm4
vmovdqa 80(%ebp),%xmm5
vmovdqa 96(%ebp),%xmm6
vmovdqa 112(%ebp),%xmm7
vpaddd 64(%eax),%xmm4,%xmm4
vmovdqa %xmm2,32(%ebx)
vmovdqa %xmm3,48(%ebx)
vmovdqa %xmm4,64(%ebx)
vmovdqa %xmm5,80(%ebx)
vmovdqa %xmm6,96(%ebx)
vmovdqa %xmm7,112(%ebx)
vmovdqa %xmm4,64(%ebp)
vmovdqa -128(%ebp),%xmm0
vmovdqa %xmm4,%xmm6
vmovdqa -64(%ebp),%xmm3
vmovdqa (%ebp),%xmm4
vmovdqa 16(%ebp),%xmm5
movl $10,%edx
nop
.align 32
.L016loop:
vpaddd %xmm3,%xmm0,%xmm0
vpxor %xmm0,%xmm6,%xmm6
.byte 143,232,120,194,246,16
vpaddd %xmm6,%xmm4,%xmm4
vpxor %xmm4,%xmm3,%xmm2
vmovdqa -112(%ebx),%xmm1
.byte 143,232,120,194,210,12
vmovdqa -48(%ebx),%xmm3
vpaddd %xmm2,%xmm0,%xmm0
vmovdqa 80(%ebx),%xmm7
vpxor %xmm0,%xmm6,%xmm6
vpaddd %xmm3,%xmm1,%xmm1
.byte 143,232,120,194,246,8
vmovdqa %xmm0,-128(%ebx)
vpaddd %xmm6,%xmm4,%xmm4
vmovdqa %xmm6,64(%ebx)
vpxor %xmm4,%xmm2,%xmm2
vpxor %xmm1,%xmm7,%xmm7
.byte 143,232,120,194,210,7
vmovdqa %xmm4,(%ebx)
.byte 143,232,120,194,255,16
vmovdqa %xmm2,-64(%ebx)
vpaddd %xmm7,%xmm5,%xmm5
vmovdqa 32(%ebx),%xmm4
vpxor %xmm5,%xmm3,%xmm3
vmovdqa -96(%ebx),%xmm0
.byte 143,232,120,194,219,12
vmovdqa -32(%ebx),%xmm2
vpaddd %xmm3,%xmm1,%xmm1
vmovdqa 96(%ebx),%xmm6
vpxor %xmm1,%xmm7,%xmm7
vpaddd %xmm2,%xmm0,%xmm0
.byte 143,232,120,194,255,8
vmovdqa %xmm1,-112(%ebx)
vpaddd %xmm7,%xmm5,%xmm5
vmovdqa %xmm7,80(%ebx)
vpxor %xmm5,%xmm3,%xmm3
vpxor %xmm0,%xmm6,%xmm6
.byte 143,232,120,194,219,7
vmovdqa %xmm5,16(%ebx)
.byte 143,232,120,194,246,16
vmovdqa %xmm3,-48(%ebx)
vpaddd %xmm6,%xmm4,%xmm4
vmovdqa 48(%ebx),%xmm5
vpxor %xmm4,%xmm2,%xmm2
vmovdqa -80(%ebx),%xmm1
.byte 143,232,120,194,210,12
vmovdqa -16(%ebx),%xmm3
vpaddd %xmm2,%xmm0,%xmm0
vmovdqa 112(%ebx),%xmm7
vpxor %xmm0,%xmm6,%xmm6
vpaddd %xmm3,%xmm1,%xmm1
.byte 143,232,120,194,246,8
vmovdqa %xmm0,-96(%ebx)
vpaddd %xmm6,%xmm4,%xmm4
vmovdqa %xmm6,96(%ebx)
vpxor %xmm4,%xmm2,%xmm2
vpxor %xmm1,%xmm7,%xmm7
.byte 143,232,120,194,210,7
.byte 143,232,120,194,255,16
vmovdqa %xmm2,-32(%ebx)
vpaddd %xmm7,%xmm5,%xmm5
vpxor %xmm5,%xmm3,%xmm3
vmovdqa -128(%ebx),%xmm0
.byte 143,232,120,194,219,12
vmovdqa -48(%ebx),%xmm2
vpaddd %xmm3,%xmm1,%xmm1
vpxor %xmm1,%xmm7,%xmm7
vpaddd %xmm2,%xmm0,%xmm0
.byte 143,232,120,194,255,8
vmovdqa %xmm1,-80(%ebx)
vpaddd %xmm7,%xmm5,%xmm5
vpxor %xmm5,%xmm3,%xmm3
vpxor %xmm0,%xmm7,%xmm6
.byte 143,232,120,194,219,7
.byte 143,232,120,194,246,16
vmovdqa %xmm3,-16(%ebx)
vpaddd %xmm6,%xmm4,%xmm4
vpxor %xmm4,%xmm2,%xmm2
vmovdqa -112(%ebx),%xmm1
.byte 143,232,120,194,210,12
vmovdqa -32(%ebx),%xmm3
vpaddd %xmm2,%xmm0,%xmm0
vmovdqa 64(%ebx),%xmm7
vpxor %xmm0,%xmm6,%xmm6
vpaddd %xmm3,%xmm1,%xmm1
.byte 143,232,120,194,246,8
vmovdqa %xmm0,-128(%ebx)
vpaddd %xmm6,%xmm4,%xmm4
vmovdqa %xmm6,112(%ebx)
vpxor %xmm4,%xmm2,%xmm2
vpxor %xmm1,%xmm7,%xmm7
.byte 143,232,120,194,210,7
vmovdqa %xmm4,32(%ebx)
.byte 143,232,120,194,255,16
vmovdqa %xmm2,-48(%ebx)
vpaddd %xmm7,%xmm5,%xmm5
vmovdqa (%ebx),%xmm4
vpxor %xmm5,%xmm3,%xmm3
vmovdqa -96(%ebx),%xmm0
.byte 143,232,120,194,219,12
vmovdqa -16(%ebx),%xmm2
vpaddd %xmm3,%xmm1,%xmm1
vmovdqa 80(%ebx),%xmm6
vpxor %xmm1,%xmm7,%xmm7
vpaddd %xmm2,%xmm0,%xmm0
.byte 143,232,120,194,255,8
vmovdqa %xmm1,-112(%ebx)
vpaddd %xmm7,%xmm5,%xmm5
vmovdqa %xmm7,64(%ebx)
vpxor %xmm5,%xmm3,%xmm3
vpxor %xmm0,%xmm6,%xmm6
.byte 143,232,120,194,219,7
vmovdqa %xmm5,48(%ebx)
.byte 143,232,120,194,246,16
vmovdqa %xmm3,-32(%ebx)
vpaddd %xmm6,%xmm4,%xmm4
vmovdqa 16(%ebx),%xmm5
vpxor %xmm4,%xmm2,%xmm2
vmovdqa -80(%ebx),%xmm1
.byte 143,232,120,194,210,12
vmovdqa -64(%ebx),%xmm3
vpaddd %xmm2,%xmm0,%xmm0
vmovdqa 96(%ebx),%xmm7
vpxor %xmm0,%xmm6,%xmm6
vpaddd %xmm3,%xmm1,%xmm1
.byte 143,232,120,194,246,8
vmovdqa %xmm0,-96(%ebx)
vpaddd %xmm6,%xmm4,%xmm4
vmovdqa %xmm6,80(%ebx)
vpxor %xmm4,%xmm2,%xmm2
vpxor %xmm1,%xmm7,%xmm7
.byte 143,232,120,194,210,7
.byte 143,232,120,194,255,16
vmovdqa %xmm2,-16(%ebx)
vpaddd %xmm7,%xmm5,%xmm5
vpxor %xmm5,%xmm3,%xmm3
vmovdqa -128(%ebx),%xmm0
.byte 143,232,120,194,219,12
vpaddd %xmm3,%xmm1,%xmm1
vmovdqa 64(%ebx),%xmm6
vpxor %xmm1,%xmm7,%xmm7
.byte 143,232,120,194,255,8
vmovdqa %xmm1,-80(%ebx)
vpaddd %xmm7,%xmm5,%xmm5
vmovdqa %xmm7,96(%ebx)
vpxor %xmm5,%xmm3,%xmm3
.byte 143,232,120,194,219,7
decl %edx
jnz .L016loop
vmovdqa %xmm3,-64(%ebx)
vmovdqa %xmm4,(%ebx)
vmovdqa %xmm5,16(%ebx)
vmovdqa %xmm6,64(%ebx)
vmovdqa %xmm7,96(%ebx)
vmovdqa -112(%ebx),%xmm1
vmovdqa -96(%ebx),%xmm2
vmovdqa -80(%ebx),%xmm3
vpaddd -128(%ebp),%xmm0,%xmm0
vpaddd -112(%ebp),%xmm1,%xmm1
vpaddd -96(%ebp),%xmm2,%xmm2
vpaddd -80(%ebp),%xmm3,%xmm3
vpunpckldq %xmm1,%xmm0,%xmm6
vpunpckldq %xmm3,%xmm2,%xmm7
vpunpckhdq %xmm1,%xmm0,%xmm0
vpunpckhdq %xmm3,%xmm2,%xmm2
vpunpcklqdq %xmm7,%xmm6,%xmm1
vpunpckhqdq %xmm7,%xmm6,%xmm6
vpunpcklqdq %xmm2,%xmm0,%xmm7
vpunpckhqdq %xmm2,%xmm0,%xmm3
vpxor -128(%esi),%xmm1,%xmm4
vpxor -64(%esi),%xmm6,%xmm5
vpxor (%esi),%xmm7,%xmm6
vpxor 64(%esi),%xmm3,%xmm7
leal 16(%esi),%esi
vmovdqa -64(%ebx),%xmm0
vmovdqa -48(%ebx),%xmm1
vmovdqa -32(%ebx),%xmm2
vmovdqa -16(%ebx),%xmm3
vmovdqu %xmm4,-128(%edi)
vmovdqu %xmm5,-64(%edi)
vmovdqu %xmm6,(%edi)
vmovdqu %xmm7,64(%edi)
leal 16(%edi),%edi
vpaddd -64(%ebp),%xmm0,%xmm0
vpaddd -48(%ebp),%xmm1,%xmm1
vpaddd -32(%ebp),%xmm2,%xmm2
vpaddd -16(%ebp),%xmm3,%xmm3
vpunpckldq %xmm1,%xmm0,%xmm6
vpunpckldq %xmm3,%xmm2,%xmm7
vpunpckhdq %xmm1,%xmm0,%xmm0
vpunpckhdq %xmm3,%xmm2,%xmm2
vpunpcklqdq %xmm7,%xmm6,%xmm1
vpunpckhqdq %xmm7,%xmm6,%xmm6
vpunpcklqdq %xmm2,%xmm0,%xmm7
vpunpckhqdq %xmm2,%xmm0,%xmm3
vpxor -128(%esi),%xmm1,%xmm4
vpxor -64(%esi),%xmm6,%xmm5
vpxor (%esi),%xmm7,%xmm6
vpxor 64(%esi),%xmm3,%xmm7
leal 16(%esi),%esi
vmovdqa (%ebx),%xmm0
vmovdqa 16(%ebx),%xmm1
vmovdqa 32(%ebx),%xmm2
vmovdqa 48(%ebx),%xmm3
vmovdqu %xmm4,-128(%edi)
vmovdqu %xmm5,-64(%edi)
vmovdqu %xmm6,(%edi)
vmovdqu %xmm7,64(%edi)
leal 16(%edi),%edi
vpaddd (%ebp),%xmm0,%xmm0
vpaddd 16(%ebp),%xmm1,%xmm1
vpaddd 32(%ebp),%xmm2,%xmm2
vpaddd 48(%ebp),%xmm3,%xmm3
vpunpckldq %xmm1,%xmm0,%xmm6
vpunpckldq %xmm3,%xmm2,%xmm7
vpunpckhdq %xmm1,%xmm0,%xmm0
vpunpckhdq %xmm3,%xmm2,%xmm2
vpunpcklqdq %xmm7,%xmm6,%xmm1
vpunpckhqdq %xmm7,%xmm6,%xmm6
vpunpcklqdq %xmm2,%xmm0,%xmm7
vpunpckhqdq %xmm2,%xmm0,%xmm3
vpxor -128(%esi),%xmm1,%xmm4
vpxor -64(%esi),%xmm6,%xmm5
vpxor (%esi),%xmm7,%xmm6
vpxor 64(%esi),%xmm3,%xmm7
leal 16(%esi),%esi
vmovdqa 64(%ebx),%xmm0
vmovdqa 80(%ebx),%xmm1
vmovdqa 96(%ebx),%xmm2
vmovdqa 112(%ebx),%xmm3
vmovdqu %xmm4,-128(%edi)
vmovdqu %xmm5,-64(%edi)
vmovdqu %xmm6,(%edi)
vmovdqu %xmm7,64(%edi)
leal 16(%edi),%edi
vpaddd 64(%ebp),%xmm0,%xmm0
vpaddd 80(%ebp),%xmm1,%xmm1
vpaddd 96(%ebp),%xmm2,%xmm2
vpaddd 112(%ebp),%xmm3,%xmm3
vpunpckldq %xmm1,%xmm0,%xmm6
vpunpckldq %xmm3,%xmm2,%xmm7
vpunpckhdq %xmm1,%xmm0,%xmm0
vpunpckhdq %xmm3,%xmm2,%xmm2
vpunpcklqdq %xmm7,%xmm6,%xmm1
vpunpckhqdq %xmm7,%xmm6,%xmm6
vpunpcklqdq %xmm2,%xmm0,%xmm7
vpunpckhqdq %xmm2,%xmm0,%xmm3
vpxor -128(%esi),%xmm1,%xmm4
vpxor -64(%esi),%xmm6,%xmm5
vpxor (%esi),%xmm7,%xmm6
vpxor 64(%esi),%xmm3,%xmm7
leal 208(%esi),%esi
vmovdqu %xmm4,-128(%edi)
vmovdqu %xmm5,-64(%edi)
vmovdqu %xmm6,(%edi)
vmovdqu %xmm7,64(%edi)
leal 208(%edi),%edi
subl $256,%ecx
jnc .L015outer_loop
addl $256,%ecx
jz .L017done
movl 520(%esp),%ebx
leal -128(%esi),%esi
movl 516(%esp),%edx
leal -128(%edi),%edi
vmovd 64(%ebp),%xmm2
vmovdqu (%ebx),%xmm3
vpaddd 96(%eax),%xmm2,%xmm2
vpand 112(%eax),%xmm3,%xmm3
vpor %xmm2,%xmm3,%xmm3
.L0141x:
vmovdqa 32(%eax),%xmm0
vmovdqu (%edx),%xmm1
vmovdqu 16(%edx),%xmm2
vmovdqa (%eax),%xmm6
vmovdqa 16(%eax),%xmm7
movl %ebp,48(%esp)
vmovdqa %xmm0,(%esp)
vmovdqa %xmm1,16(%esp)
vmovdqa %xmm2,32(%esp)
vmovdqa %xmm3,48(%esp)
movl $10,%edx
jmp .L018loop1x
.align 16
.L019outer1x:
vmovdqa 80(%eax),%xmm3
vmovdqa (%esp),%xmm0
vmovdqa 16(%esp),%xmm1
vmovdqa 32(%esp),%xmm2
vpaddd 48(%esp),%xmm3,%xmm3
movl $10,%edx
vmovdqa %xmm3,48(%esp)
jmp .L018loop1x
.align 16
.L018loop1x:
vpaddd %xmm1,%xmm0,%xmm0
vpxor %xmm0,%xmm3,%xmm3
.byte 143,232,120,194,219,16
vpaddd %xmm3,%xmm2,%xmm2
vpxor %xmm2,%xmm1,%xmm1
.byte 143,232,120,194,201,12
vpaddd %xmm1,%xmm0,%xmm0
vpxor %xmm0,%xmm3,%xmm3
.byte 143,232,120,194,219,8
vpaddd %xmm3,%xmm2,%xmm2
vpxor %xmm2,%xmm1,%xmm1
.byte 143,232,120,194,201,7
vpshufd $78,%xmm2,%xmm2
vpshufd $57,%xmm1,%xmm1
vpshufd $147,%xmm3,%xmm3
vpaddd %xmm1,%xmm0,%xmm0
vpxor %xmm0,%xmm3,%xmm3
.byte 143,232,120,194,219,16
vpaddd %xmm3,%xmm2,%xmm2
vpxor %xmm2,%xmm1,%xmm1
.byte 143,232,120,194,201,12
vpaddd %xmm1,%xmm0,%xmm0
vpxor %xmm0,%xmm3,%xmm3
.byte 143,232,120,194,219,8
vpaddd %xmm3,%xmm2,%xmm2
vpxor %xmm2,%xmm1,%xmm1
.byte 143,232,120,194,201,7
vpshufd $78,%xmm2,%xmm2
vpshufd $147,%xmm1,%xmm1
vpshufd $57,%xmm3,%xmm3
decl %edx
jnz .L018loop1x
vpaddd (%esp),%xmm0,%xmm0
vpaddd 16(%esp),%xmm1,%xmm1
vpaddd 32(%esp),%xmm2,%xmm2
vpaddd 48(%esp),%xmm3,%xmm3
cmpl $64,%ecx
jb .L020tail
vpxor (%esi),%xmm0,%xmm0
vpxor 16(%esi),%xmm1,%xmm1
vpxor 32(%esi),%xmm2,%xmm2
vpxor 48(%esi),%xmm3,%xmm3
leal 64(%esi),%esi
vmovdqu %xmm0,(%edi)
vmovdqu %xmm1,16(%edi)
vmovdqu %xmm2,32(%edi)
vmovdqu %xmm3,48(%edi)
leal 64(%edi),%edi
subl $64,%ecx
jnz .L019outer1x
jmp .L017done
.L020tail:
vmovdqa %xmm0,(%esp)
vmovdqa %xmm1,16(%esp)
vmovdqa %xmm2,32(%esp)
vmovdqa %xmm3,48(%esp)
xorl %eax,%eax
xorl %edx,%edx
xorl %ebp,%ebp
.L021tail_loop:
movb (%esp,%ebp,1),%al
movb (%esi,%ebp,1),%dl
leal 1(%ebp),%ebp
xorb %dl,%al
movb %al,-1(%edi,%ebp,1)
decl %ecx
jnz .L021tail_loop
.L017done:
vzeroupper
movl 512(%esp),%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size ChaCha20_xop,.-.L_ChaCha20_xop_begin
.comm OPENSSL_ia32cap_P,16,4
#else
.text
.globl ChaCha20_ctr32
.type ChaCha20_ctr32,@function
.align 16
ChaCha20_ctr32:
.L_ChaCha20_ctr32_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
xorl %eax,%eax
cmpl 28(%esp),%eax
je .L000no_data
call .Lpic_point
.Lpic_point:
popl %eax
leal OPENSSL_ia32cap_P,%ebp
testl $16777216,(%ebp)
jz .L001x86
testl $512,4(%ebp)
jz .L001x86
jmp .Lssse3_shortcut
.L001x86:
movl 32(%esp),%esi
movl 36(%esp),%edi
subl $132,%esp
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
movl 12(%esi),%edx
movl %eax,80(%esp)
movl %ebx,84(%esp)
movl %ecx,88(%esp)
movl %edx,92(%esp)
movl 16(%esi),%eax
movl 20(%esi),%ebx
movl 24(%esi),%ecx
movl 28(%esi),%edx
movl %eax,96(%esp)
movl %ebx,100(%esp)
movl %ecx,104(%esp)
movl %edx,108(%esp)
movl (%edi),%eax
movl 4(%edi),%ebx
movl 8(%edi),%ecx
movl 12(%edi),%edx
subl $1,%eax
movl %eax,112(%esp)
movl %ebx,116(%esp)
movl %ecx,120(%esp)
movl %edx,124(%esp)
jmp .L002entry
.align 16
.L003outer_loop:
movl %ebx,156(%esp)
movl %eax,152(%esp)
movl %ecx,160(%esp)
.L002entry:
movl $1634760805,%eax
movl $857760878,4(%esp)
movl $2036477234,8(%esp)
movl $1797285236,12(%esp)
movl 84(%esp),%ebx
movl 88(%esp),%ebp
movl 104(%esp),%ecx
movl 108(%esp),%esi
movl 116(%esp),%edx
movl 120(%esp),%edi
movl %ebx,20(%esp)
movl %ebp,24(%esp)
movl %ecx,40(%esp)
movl %esi,44(%esp)
movl %edx,52(%esp)
movl %edi,56(%esp)
movl 92(%esp),%ebx
movl 124(%esp),%edi
movl 112(%esp),%edx
movl 80(%esp),%ebp
movl 96(%esp),%ecx
movl 100(%esp),%esi
addl $1,%edx
movl %ebx,28(%esp)
movl %edi,60(%esp)
movl %edx,112(%esp)
movl $10,%ebx
jmp .L004loop
.align 16
.L004loop:
addl %ebp,%eax
movl %ebx,128(%esp)
movl %ebp,%ebx
xorl %eax,%edx
roll $16,%edx
addl %edx,%ecx
xorl %ecx,%ebx
movl 52(%esp),%edi
roll $12,%ebx
movl 20(%esp),%ebp
addl %ebx,%eax
xorl %eax,%edx
movl %eax,(%esp)
roll $8,%edx
movl 4(%esp),%eax
addl %edx,%ecx
movl %edx,48(%esp)
xorl %ecx,%ebx
addl %ebp,%eax
roll $7,%ebx
xorl %eax,%edi
movl %ecx,32(%esp)
roll $16,%edi
movl %ebx,16(%esp)
addl %edi,%esi
movl 40(%esp),%ecx
xorl %esi,%ebp
movl 56(%esp),%edx
roll $12,%ebp
movl 24(%esp),%ebx
addl %ebp,%eax
xorl %eax,%edi
movl %eax,4(%esp)
roll $8,%edi
movl 8(%esp),%eax
addl %edi,%esi
movl %edi,52(%esp)
xorl %esi,%ebp
addl %ebx,%eax
roll $7,%ebp
xorl %eax,%edx
movl %esi,36(%esp)
roll $16,%edx
movl %ebp,20(%esp)
addl %edx,%ecx
movl 44(%esp),%esi
xorl %ecx,%ebx
movl 60(%esp),%edi
roll $12,%ebx
movl 28(%esp),%ebp
addl %ebx,%eax
xorl %eax,%edx
movl %eax,8(%esp)
roll $8,%edx
movl 12(%esp),%eax
addl %edx,%ecx
movl %edx,56(%esp)
xorl %ecx,%ebx
addl %ebp,%eax
roll $7,%ebx
xorl %eax,%edi
roll $16,%edi
movl %ebx,24(%esp)
addl %edi,%esi
xorl %esi,%ebp
roll $12,%ebp
movl 20(%esp),%ebx
addl %ebp,%eax
xorl %eax,%edi
movl %eax,12(%esp)
roll $8,%edi
movl (%esp),%eax
addl %edi,%esi
movl %edi,%edx
xorl %esi,%ebp
addl %ebx,%eax
roll $7,%ebp
xorl %eax,%edx
roll $16,%edx
movl %ebp,28(%esp)
addl %edx,%ecx
xorl %ecx,%ebx
movl 48(%esp),%edi
roll $12,%ebx
movl 24(%esp),%ebp
addl %ebx,%eax
xorl %eax,%edx
movl %eax,(%esp)
roll $8,%edx
movl 4(%esp),%eax
addl %edx,%ecx
movl %edx,60(%esp)
xorl %ecx,%ebx
addl %ebp,%eax
roll $7,%ebx
xorl %eax,%edi
movl %ecx,40(%esp)
roll $16,%edi
movl %ebx,20(%esp)
addl %edi,%esi
movl 32(%esp),%ecx
xorl %esi,%ebp
movl 52(%esp),%edx
roll $12,%ebp
movl 28(%esp),%ebx
addl %ebp,%eax
xorl %eax,%edi
movl %eax,4(%esp)
roll $8,%edi
movl 8(%esp),%eax
addl %edi,%esi
movl %edi,48(%esp)
xorl %esi,%ebp
addl %ebx,%eax
roll $7,%ebp
xorl %eax,%edx
movl %esi,44(%esp)
roll $16,%edx
movl %ebp,24(%esp)
addl %edx,%ecx
movl 36(%esp),%esi
xorl %ecx,%ebx
movl 56(%esp),%edi
roll $12,%ebx
movl 16(%esp),%ebp
addl %ebx,%eax
xorl %eax,%edx
movl %eax,8(%esp)
roll $8,%edx
movl 12(%esp),%eax
addl %edx,%ecx
movl %edx,52(%esp)
xorl %ecx,%ebx
addl %ebp,%eax
roll $7,%ebx
xorl %eax,%edi
roll $16,%edi
movl %ebx,28(%esp)
addl %edi,%esi
xorl %esi,%ebp
movl 48(%esp),%edx
roll $12,%ebp
movl 128(%esp),%ebx
addl %ebp,%eax
xorl %eax,%edi
movl %eax,12(%esp)
roll $8,%edi
movl (%esp),%eax
addl %edi,%esi
movl %edi,56(%esp)
xorl %esi,%ebp
roll $7,%ebp
decl %ebx
jnz .L004loop
movl 160(%esp),%ebx
addl $1634760805,%eax
addl 80(%esp),%ebp
addl 96(%esp),%ecx
addl 100(%esp),%esi
cmpl $64,%ebx
jb .L005tail
movl 156(%esp),%ebx
addl 112(%esp),%edx
addl 120(%esp),%edi
xorl (%ebx),%eax
xorl 16(%ebx),%ebp
movl %eax,(%esp)
movl 152(%esp),%eax
xorl 32(%ebx),%ecx
xorl 36(%ebx),%esi
xorl 48(%ebx),%edx
xorl 56(%ebx),%edi
movl %ebp,16(%eax)
movl %ecx,32(%eax)
movl %esi,36(%eax)
movl %edx,48(%eax)
movl %edi,56(%eax)
movl 4(%esp),%ebp
movl 8(%esp),%ecx
movl 12(%esp),%esi
movl 20(%esp),%edx
movl 24(%esp),%edi
addl $857760878,%ebp
addl $2036477234,%ecx
addl $1797285236,%esi
addl 84(%esp),%edx
addl 88(%esp),%edi
xorl 4(%ebx),%ebp
xorl 8(%ebx),%ecx
xorl 12(%ebx),%esi
xorl 20(%ebx),%edx
xorl 24(%ebx),%edi
movl %ebp,4(%eax)
movl %ecx,8(%eax)
movl %esi,12(%eax)
movl %edx,20(%eax)
movl %edi,24(%eax)
movl 28(%esp),%ebp
movl 40(%esp),%ecx
movl 44(%esp),%esi
movl 52(%esp),%edx
movl 60(%esp),%edi
addl 92(%esp),%ebp
addl 104(%esp),%ecx
addl 108(%esp),%esi
addl 116(%esp),%edx
addl 124(%esp),%edi
xorl 28(%ebx),%ebp
xorl 40(%ebx),%ecx
xorl 44(%ebx),%esi
xorl 52(%ebx),%edx
xorl 60(%ebx),%edi
leal 64(%ebx),%ebx
movl %ebp,28(%eax)
movl (%esp),%ebp
movl %ecx,40(%eax)
movl 160(%esp),%ecx
movl %esi,44(%eax)
movl %edx,52(%eax)
movl %edi,60(%eax)
movl %ebp,(%eax)
leal 64(%eax),%eax
subl $64,%ecx
jnz .L003outer_loop
jmp .L006done
.L005tail:
addl 112(%esp),%edx
addl 120(%esp),%edi
movl %eax,(%esp)
movl %ebp,16(%esp)
movl %ecx,32(%esp)
movl %esi,36(%esp)
movl %edx,48(%esp)
movl %edi,56(%esp)
movl 4(%esp),%ebp
movl 8(%esp),%ecx
movl 12(%esp),%esi
movl 20(%esp),%edx
movl 24(%esp),%edi
addl $857760878,%ebp
addl $2036477234,%ecx
addl $1797285236,%esi
addl 84(%esp),%edx
addl 88(%esp),%edi
movl %ebp,4(%esp)
movl %ecx,8(%esp)
movl %esi,12(%esp)
movl %edx,20(%esp)
movl %edi,24(%esp)
movl 28(%esp),%ebp
movl 40(%esp),%ecx
movl 44(%esp),%esi
movl 52(%esp),%edx
movl 60(%esp),%edi
addl 92(%esp),%ebp
addl 104(%esp),%ecx
addl 108(%esp),%esi
addl 116(%esp),%edx
addl 124(%esp),%edi
movl %ebp,28(%esp)
movl 156(%esp),%ebp
movl %ecx,40(%esp)
movl 152(%esp),%ecx
movl %esi,44(%esp)
xorl %esi,%esi
movl %edx,52(%esp)
movl %edi,60(%esp)
xorl %eax,%eax
xorl %edx,%edx
.L007tail_loop:
movb (%esi,%ebp,1),%al
movb (%esp,%esi,1),%dl
leal 1(%esi),%esi
xorb %dl,%al
movb %al,-1(%ecx,%esi,1)
decl %ebx
jnz .L007tail_loop
.L006done:
addl $132,%esp
.L000no_data:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
.globl ChaCha20_ssse3
.type ChaCha20_ssse3,@function
.align 16
ChaCha20_ssse3:
.L_ChaCha20_ssse3_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
.Lssse3_shortcut:
testl $2048,4(%ebp)
jnz .Lxop_shortcut
movl 20(%esp),%edi
movl 24(%esp),%esi
movl 28(%esp),%ecx
movl 32(%esp),%edx
movl 36(%esp),%ebx
movl %esp,%ebp
subl $524,%esp
andl $-64,%esp
movl %ebp,512(%esp)
leal .Lssse3_data-.Lpic_point(%eax),%eax
movdqu (%ebx),%xmm3
.L0081x:
movdqa 32(%eax),%xmm0
movdqu (%edx),%xmm1
movdqu 16(%edx),%xmm2
movdqa (%eax),%xmm6
movdqa 16(%eax),%xmm7
movl %ebp,48(%esp)
movdqa %xmm0,(%esp)
movdqa %xmm1,16(%esp)
movdqa %xmm2,32(%esp)
movdqa %xmm3,48(%esp)
movl $10,%edx
jmp .L009loop1x
.align 16
.L010outer1x:
movdqa 80(%eax),%xmm3
movdqa (%esp),%xmm0
movdqa 16(%esp),%xmm1
movdqa 32(%esp),%xmm2
paddd 48(%esp),%xmm3
movl $10,%edx
movdqa %xmm3,48(%esp)
jmp .L009loop1x
.align 16
.L009loop1x:
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
.byte 102,15,56,0,222
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm4
psrld $20,%xmm1
pslld $12,%xmm4
por %xmm4,%xmm1
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
.byte 102,15,56,0,223
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm4
psrld $25,%xmm1
pslld $7,%xmm4
por %xmm4,%xmm1
pshufd $78,%xmm2,%xmm2
pshufd $57,%xmm1,%xmm1
pshufd $147,%xmm3,%xmm3
nop
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
.byte 102,15,56,0,222
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm4
psrld $20,%xmm1
pslld $12,%xmm4
por %xmm4,%xmm1
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
.byte 102,15,56,0,223
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm4
psrld $25,%xmm1
pslld $7,%xmm4
por %xmm4,%xmm1
pshufd $78,%xmm2,%xmm2
pshufd $147,%xmm1,%xmm1
pshufd $57,%xmm3,%xmm3
decl %edx
jnz .L009loop1x
paddd (%esp),%xmm0
paddd 16(%esp),%xmm1
paddd 32(%esp),%xmm2
paddd 48(%esp),%xmm3
cmpl $64,%ecx
jb .L011tail
movdqu (%esi),%xmm4
movdqu 16(%esi),%xmm5
pxor %xmm4,%xmm0
movdqu 32(%esi),%xmm4
pxor %xmm5,%xmm1
movdqu 48(%esi),%xmm5
pxor %xmm4,%xmm2
pxor %xmm5,%xmm3
leal 64(%esi),%esi
movdqu %xmm0,(%edi)
movdqu %xmm1,16(%edi)
movdqu %xmm2,32(%edi)
movdqu %xmm3,48(%edi)
leal 64(%edi),%edi
subl $64,%ecx
jnz .L010outer1x
jmp .L012done
.L011tail:
movdqa %xmm0,(%esp)
movdqa %xmm1,16(%esp)
movdqa %xmm2,32(%esp)
movdqa %xmm3,48(%esp)
xorl %eax,%eax
xorl %edx,%edx
xorl %ebp,%ebp
.L013tail_loop:
movb (%esp,%ebp,1),%al
movb (%esi,%ebp,1),%dl
leal 1(%ebp),%ebp
xorb %dl,%al
movb %al,-1(%edi,%ebp,1)
decl %ecx
jnz .L013tail_loop
.L012done:
movl 512(%esp),%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
.align 64
.Lssse3_data:
.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
.long 1634760805,857760878,2036477234,1797285236
.long 0,1,2,3
.long 4,4,4,4
.long 1,0,0,0
.long 4,0,0,0
.long 0,-1,-1,-1
.align 64
.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
.byte 114,103,62,0
.globl ChaCha20_xop
.type ChaCha20_xop,@function
.align 16
ChaCha20_xop:
.L_ChaCha20_xop_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
.Lxop_shortcut:
movl 20(%esp),%edi
movl 24(%esp),%esi
movl 28(%esp),%ecx
movl 32(%esp),%edx
movl 36(%esp),%ebx
vzeroupper
movl %esp,%ebp
subl $524,%esp
andl $-64,%esp
movl %ebp,512(%esp)
leal .Lssse3_data-.Lpic_point(%eax),%eax
vmovdqu (%ebx),%xmm3
cmpl $256,%ecx
jb .L0141x
movl %edx,516(%esp)
movl %ebx,520(%esp)
subl $256,%ecx
leal 384(%esp),%ebp
vmovdqu (%edx),%xmm7
vpshufd $0,%xmm3,%xmm0
vpshufd $85,%xmm3,%xmm1
vpshufd $170,%xmm3,%xmm2
vpshufd $255,%xmm3,%xmm3
vpaddd 48(%eax),%xmm0,%xmm0
vpshufd $0,%xmm7,%xmm4
vpshufd $85,%xmm7,%xmm5
vpsubd 64(%eax),%xmm0,%xmm0
vpshufd $170,%xmm7,%xmm6
vpshufd $255,%xmm7,%xmm7
vmovdqa %xmm0,64(%ebp)
vmovdqa %xmm1,80(%ebp)
vmovdqa %xmm2,96(%ebp)
vmovdqa %xmm3,112(%ebp)
vmovdqu 16(%edx),%xmm3
vmovdqa %xmm4,-64(%ebp)
vmovdqa %xmm5,-48(%ebp)
vmovdqa %xmm6,-32(%ebp)
vmovdqa %xmm7,-16(%ebp)
vmovdqa 32(%eax),%xmm7
leal 128(%esp),%ebx
vpshufd $0,%xmm3,%xmm0
vpshufd $85,%xmm3,%xmm1
vpshufd $170,%xmm3,%xmm2
vpshufd $255,%xmm3,%xmm3
vpshufd $0,%xmm7,%xmm4
vpshufd $85,%xmm7,%xmm5
vpshufd $170,%xmm7,%xmm6
vpshufd $255,%xmm7,%xmm7
vmovdqa %xmm0,(%ebp)
vmovdqa %xmm1,16(%ebp)
vmovdqa %xmm2,32(%ebp)
vmovdqa %xmm3,48(%ebp)
vmovdqa %xmm4,-128(%ebp)
vmovdqa %xmm5,-112(%ebp)
vmovdqa %xmm6,-96(%ebp)
vmovdqa %xmm7,-80(%ebp)
leal 128(%esi),%esi
leal 128(%edi),%edi
jmp .L015outer_loop
.align 32
.L015outer_loop:
vmovdqa -112(%ebp),%xmm1
vmovdqa -96(%ebp),%xmm2
vmovdqa -80(%ebp),%xmm3
vmovdqa -48(%ebp),%xmm5
vmovdqa -32(%ebp),%xmm6
vmovdqa -16(%ebp),%xmm7
vmovdqa %xmm1,-112(%ebx)
vmovdqa %xmm2,-96(%ebx)
vmovdqa %xmm3,-80(%ebx)
vmovdqa %xmm5,-48(%ebx)
vmovdqa %xmm6,-32(%ebx)
vmovdqa %xmm7,-16(%ebx)
vmovdqa 32(%ebp),%xmm2
vmovdqa 48(%ebp),%xmm3
vmovdqa 64(%ebp),%xmm4
vmovdqa 80(%ebp),%xmm5
vmovdqa 96(%ebp),%xmm6
vmovdqa 112(%ebp),%xmm7
vpaddd 64(%eax),%xmm4,%xmm4
vmovdqa %xmm2,32(%ebx)
vmovdqa %xmm3,48(%ebx)
vmovdqa %xmm4,64(%ebx)
vmovdqa %xmm5,80(%ebx)
vmovdqa %xmm6,96(%ebx)
vmovdqa %xmm7,112(%ebx)
vmovdqa %xmm4,64(%ebp)
vmovdqa -128(%ebp),%xmm0
vmovdqa %xmm4,%xmm6
vmovdqa -64(%ebp),%xmm3
vmovdqa (%ebp),%xmm4
vmovdqa 16(%ebp),%xmm5
movl $10,%edx
nop
.align 32
.L016loop:
vpaddd %xmm3,%xmm0,%xmm0
vpxor %xmm0,%xmm6,%xmm6
.byte 143,232,120,194,246,16
vpaddd %xmm6,%xmm4,%xmm4
vpxor %xmm4,%xmm3,%xmm2
vmovdqa -112(%ebx),%xmm1
.byte 143,232,120,194,210,12
vmovdqa -48(%ebx),%xmm3
vpaddd %xmm2,%xmm0,%xmm0
vmovdqa 80(%ebx),%xmm7
vpxor %xmm0,%xmm6,%xmm6
vpaddd %xmm3,%xmm1,%xmm1
.byte 143,232,120,194,246,8
vmovdqa %xmm0,-128(%ebx)
vpaddd %xmm6,%xmm4,%xmm4
vmovdqa %xmm6,64(%ebx)
vpxor %xmm4,%xmm2,%xmm2
vpxor %xmm1,%xmm7,%xmm7
.byte 143,232,120,194,210,7
vmovdqa %xmm4,(%ebx)
.byte 143,232,120,194,255,16
vmovdqa %xmm2,-64(%ebx)
vpaddd %xmm7,%xmm5,%xmm5
vmovdqa 32(%ebx),%xmm4
vpxor %xmm5,%xmm3,%xmm3
vmovdqa -96(%ebx),%xmm0
.byte 143,232,120,194,219,12
vmovdqa -32(%ebx),%xmm2
vpaddd %xmm3,%xmm1,%xmm1
vmovdqa 96(%ebx),%xmm6
vpxor %xmm1,%xmm7,%xmm7
vpaddd %xmm2,%xmm0,%xmm0
.byte 143,232,120,194,255,8
vmovdqa %xmm1,-112(%ebx)
vpaddd %xmm7,%xmm5,%xmm5
vmovdqa %xmm7,80(%ebx)
vpxor %xmm5,%xmm3,%xmm3
vpxor %xmm0,%xmm6,%xmm6
.byte 143,232,120,194,219,7
vmovdqa %xmm5,16(%ebx)
.byte 143,232,120,194,246,16
vmovdqa %xmm3,-48(%ebx)
vpaddd %xmm6,%xmm4,%xmm4
vmovdqa 48(%ebx),%xmm5
vpxor %xmm4,%xmm2,%xmm2
vmovdqa -80(%ebx),%xmm1
.byte 143,232,120,194,210,12
vmovdqa -16(%ebx),%xmm3
vpaddd %xmm2,%xmm0,%xmm0
vmovdqa 112(%ebx),%xmm7
vpxor %xmm0,%xmm6,%xmm6
vpaddd %xmm3,%xmm1,%xmm1
.byte 143,232,120,194,246,8
vmovdqa %xmm0,-96(%ebx)
vpaddd %xmm6,%xmm4,%xmm4
vmovdqa %xmm6,96(%ebx)
vpxor %xmm4,%xmm2,%xmm2
vpxor %xmm1,%xmm7,%xmm7
.byte 143,232,120,194,210,7
.byte 143,232,120,194,255,16
vmovdqa %xmm2,-32(%ebx)
vpaddd %xmm7,%xmm5,%xmm5
vpxor %xmm5,%xmm3,%xmm3
vmovdqa -128(%ebx),%xmm0
.byte 143,232,120,194,219,12
vmovdqa -48(%ebx),%xmm2
vpaddd %xmm3,%xmm1,%xmm1
vpxor %xmm1,%xmm7,%xmm7
vpaddd %xmm2,%xmm0,%xmm0
.byte 143,232,120,194,255,8
vmovdqa %xmm1,-80(%ebx)
vpaddd %xmm7,%xmm5,%xmm5
vpxor %xmm5,%xmm3,%xmm3
vpxor %xmm0,%xmm7,%xmm6
.byte 143,232,120,194,219,7
.byte 143,232,120,194,246,16
vmovdqa %xmm3,-16(%ebx)
vpaddd %xmm6,%xmm4,%xmm4
vpxor %xmm4,%xmm2,%xmm2
vmovdqa -112(%ebx),%xmm1
.byte 143,232,120,194,210,12
vmovdqa -32(%ebx),%xmm3
vpaddd %xmm2,%xmm0,%xmm0
vmovdqa 64(%ebx),%xmm7
vpxor %xmm0,%xmm6,%xmm6
vpaddd %xmm3,%xmm1,%xmm1
.byte 143,232,120,194,246,8
vmovdqa %xmm0,-128(%ebx)
vpaddd %xmm6,%xmm4,%xmm4
vmovdqa %xmm6,112(%ebx)
vpxor %xmm4,%xmm2,%xmm2
vpxor %xmm1,%xmm7,%xmm7
.byte 143,232,120,194,210,7
vmovdqa %xmm4,32(%ebx)
.byte 143,232,120,194,255,16
vmovdqa %xmm2,-48(%ebx)
vpaddd %xmm7,%xmm5,%xmm5
vmovdqa (%ebx),%xmm4
vpxor %xmm5,%xmm3,%xmm3
vmovdqa -96(%ebx),%xmm0
.byte 143,232,120,194,219,12
vmovdqa -16(%ebx),%xmm2
vpaddd %xmm3,%xmm1,%xmm1
vmovdqa 80(%ebx),%xmm6
vpxor %xmm1,%xmm7,%xmm7
vpaddd %xmm2,%xmm0,%xmm0
.byte 143,232,120,194,255,8
vmovdqa %xmm1,-112(%ebx)
vpaddd %xmm7,%xmm5,%xmm5
vmovdqa %xmm7,64(%ebx)
vpxor %xmm5,%xmm3,%xmm3
vpxor %xmm0,%xmm6,%xmm6
.byte 143,232,120,194,219,7
vmovdqa %xmm5,48(%ebx)
.byte 143,232,120,194,246,16
vmovdqa %xmm3,-32(%ebx)
vpaddd %xmm6,%xmm4,%xmm4
vmovdqa 16(%ebx),%xmm5
vpxor %xmm4,%xmm2,%xmm2
vmovdqa -80(%ebx),%xmm1
.byte 143,232,120,194,210,12
vmovdqa -64(%ebx),%xmm3
vpaddd %xmm2,%xmm0,%xmm0
vmovdqa 96(%ebx),%xmm7
vpxor %xmm0,%xmm6,%xmm6
vpaddd %xmm3,%xmm1,%xmm1
.byte 143,232,120,194,246,8
vmovdqa %xmm0,-96(%ebx)
vpaddd %xmm6,%xmm4,%xmm4
vmovdqa %xmm6,80(%ebx)
vpxor %xmm4,%xmm2,%xmm2
vpxor %xmm1,%xmm7,%xmm7
.byte 143,232,120,194,210,7
.byte 143,232,120,194,255,16
vmovdqa %xmm2,-16(%ebx)
vpaddd %xmm7,%xmm5,%xmm5
vpxor %xmm5,%xmm3,%xmm3
vmovdqa -128(%ebx),%xmm0
.byte 143,232,120,194,219,12
vpaddd %xmm3,%xmm1,%xmm1
vmovdqa 64(%ebx),%xmm6
vpxor %xmm1,%xmm7,%xmm7
.byte 143,232,120,194,255,8
vmovdqa %xmm1,-80(%ebx)
vpaddd %xmm7,%xmm5,%xmm5
vmovdqa %xmm7,96(%ebx)
vpxor %xmm5,%xmm3,%xmm3
.byte 143,232,120,194,219,7
decl %edx
jnz .L016loop
vmovdqa %xmm3,-64(%ebx)
vmovdqa %xmm4,(%ebx)
vmovdqa %xmm5,16(%ebx)
vmovdqa %xmm6,64(%ebx)
vmovdqa %xmm7,96(%ebx)
vmovdqa -112(%ebx),%xmm1
vmovdqa -96(%ebx),%xmm2
vmovdqa -80(%ebx),%xmm3
vpaddd -128(%ebp),%xmm0,%xmm0
vpaddd -112(%ebp),%xmm1,%xmm1
vpaddd -96(%ebp),%xmm2,%xmm2
vpaddd -80(%ebp),%xmm3,%xmm3
vpunpckldq %xmm1,%xmm0,%xmm6
vpunpckldq %xmm3,%xmm2,%xmm7
vpunpckhdq %xmm1,%xmm0,%xmm0
vpunpckhdq %xmm3,%xmm2,%xmm2
vpunpcklqdq %xmm7,%xmm6,%xmm1
vpunpckhqdq %xmm7,%xmm6,%xmm6
vpunpcklqdq %xmm2,%xmm0,%xmm7
vpunpckhqdq %xmm2,%xmm0,%xmm3
vpxor -128(%esi),%xmm1,%xmm4
vpxor -64(%esi),%xmm6,%xmm5
vpxor (%esi),%xmm7,%xmm6
vpxor 64(%esi),%xmm3,%xmm7
leal 16(%esi),%esi
vmovdqa -64(%ebx),%xmm0
vmovdqa -48(%ebx),%xmm1
vmovdqa -32(%ebx),%xmm2
vmovdqa -16(%ebx),%xmm3
vmovdqu %xmm4,-128(%edi)
vmovdqu %xmm5,-64(%edi)
vmovdqu %xmm6,(%edi)
vmovdqu %xmm7,64(%edi)
leal 16(%edi),%edi
vpaddd -64(%ebp),%xmm0,%xmm0
vpaddd -48(%ebp),%xmm1,%xmm1
vpaddd -32(%ebp),%xmm2,%xmm2
vpaddd -16(%ebp),%xmm3,%xmm3
vpunpckldq %xmm1,%xmm0,%xmm6
vpunpckldq %xmm3,%xmm2,%xmm7
vpunpckhdq %xmm1,%xmm0,%xmm0
vpunpckhdq %xmm3,%xmm2,%xmm2
vpunpcklqdq %xmm7,%xmm6,%xmm1
vpunpckhqdq %xmm7,%xmm6,%xmm6
vpunpcklqdq %xmm2,%xmm0,%xmm7
vpunpckhqdq %xmm2,%xmm0,%xmm3
vpxor -128(%esi),%xmm1,%xmm4
vpxor -64(%esi),%xmm6,%xmm5
vpxor (%esi),%xmm7,%xmm6
vpxor 64(%esi),%xmm3,%xmm7
leal 16(%esi),%esi
vmovdqa (%ebx),%xmm0
vmovdqa 16(%ebx),%xmm1
vmovdqa 32(%ebx),%xmm2
vmovdqa 48(%ebx),%xmm3
vmovdqu %xmm4,-128(%edi)
vmovdqu %xmm5,-64(%edi)
vmovdqu %xmm6,(%edi)
vmovdqu %xmm7,64(%edi)
leal 16(%edi),%edi
vpaddd (%ebp),%xmm0,%xmm0
vpaddd 16(%ebp),%xmm1,%xmm1
vpaddd 32(%ebp),%xmm2,%xmm2
vpaddd 48(%ebp),%xmm3,%xmm3
vpunpckldq %xmm1,%xmm0,%xmm6
vpunpckldq %xmm3,%xmm2,%xmm7
vpunpckhdq %xmm1,%xmm0,%xmm0
vpunpckhdq %xmm3,%xmm2,%xmm2
vpunpcklqdq %xmm7,%xmm6,%xmm1
vpunpckhqdq %xmm7,%xmm6,%xmm6
vpunpcklqdq %xmm2,%xmm0,%xmm7
vpunpckhqdq %xmm2,%xmm0,%xmm3
vpxor -128(%esi),%xmm1,%xmm4
vpxor -64(%esi),%xmm6,%xmm5
vpxor (%esi),%xmm7,%xmm6
vpxor 64(%esi),%xmm3,%xmm7
leal 16(%esi),%esi
vmovdqa 64(%ebx),%xmm0
vmovdqa 80(%ebx),%xmm1
vmovdqa 96(%ebx),%xmm2
vmovdqa 112(%ebx),%xmm3
vmovdqu %xmm4,-128(%edi)
vmovdqu %xmm5,-64(%edi)
vmovdqu %xmm6,(%edi)
vmovdqu %xmm7,64(%edi)
leal 16(%edi),%edi
vpaddd 64(%ebp),%xmm0,%xmm0
vpaddd 80(%ebp),%xmm1,%xmm1
vpaddd 96(%ebp),%xmm2,%xmm2
vpaddd 112(%ebp),%xmm3,%xmm3
vpunpckldq %xmm1,%xmm0,%xmm6
vpunpckldq %xmm3,%xmm2,%xmm7
vpunpckhdq %xmm1,%xmm0,%xmm0
vpunpckhdq %xmm3,%xmm2,%xmm2
vpunpcklqdq %xmm7,%xmm6,%xmm1
vpunpckhqdq %xmm7,%xmm6,%xmm6
vpunpcklqdq %xmm2,%xmm0,%xmm7
vpunpckhqdq %xmm2,%xmm0,%xmm3
vpxor -128(%esi),%xmm1,%xmm4
vpxor -64(%esi),%xmm6,%xmm5
vpxor (%esi),%xmm7,%xmm6
vpxor 64(%esi),%xmm3,%xmm7
leal 208(%esi),%esi
vmovdqu %xmm4,-128(%edi)
vmovdqu %xmm5,-64(%edi)
vmovdqu %xmm6,(%edi)
vmovdqu %xmm7,64(%edi)
leal 208(%edi),%edi
subl $256,%ecx
jnc .L015outer_loop
addl $256,%ecx
jz .L017done
movl 520(%esp),%ebx
leal -128(%esi),%esi
movl 516(%esp),%edx
leal -128(%edi),%edi
vmovd 64(%ebp),%xmm2
vmovdqu (%ebx),%xmm3
vpaddd 96(%eax),%xmm2,%xmm2
vpand 112(%eax),%xmm3,%xmm3
vpor %xmm2,%xmm3,%xmm3
.L0141x:
vmovdqa 32(%eax),%xmm0
vmovdqu (%edx),%xmm1
vmovdqu 16(%edx),%xmm2
vmovdqa (%eax),%xmm6
vmovdqa 16(%eax),%xmm7
movl %ebp,48(%esp)
vmovdqa %xmm0,(%esp)
vmovdqa %xmm1,16(%esp)
vmovdqa %xmm2,32(%esp)
vmovdqa %xmm3,48(%esp)
movl $10,%edx
jmp .L018loop1x
.align 16
.L019outer1x:
vmovdqa 80(%eax),%xmm3
vmovdqa (%esp),%xmm0
vmovdqa 16(%esp),%xmm1
vmovdqa 32(%esp),%xmm2
vpaddd 48(%esp),%xmm3,%xmm3
movl $10,%edx
vmovdqa %xmm3,48(%esp)
jmp .L018loop1x
.align 16
.L018loop1x:
vpaddd %xmm1,%xmm0,%xmm0
vpxor %xmm0,%xmm3,%xmm3
.byte 143,232,120,194,219,16
vpaddd %xmm3,%xmm2,%xmm2
vpxor %xmm2,%xmm1,%xmm1
.byte 143,232,120,194,201,12
vpaddd %xmm1,%xmm0,%xmm0
vpxor %xmm0,%xmm3,%xmm3
.byte 143,232,120,194,219,8
vpaddd %xmm3,%xmm2,%xmm2
vpxor %xmm2,%xmm1,%xmm1
.byte 143,232,120,194,201,7
vpshufd $78,%xmm2,%xmm2
vpshufd $57,%xmm1,%xmm1
vpshufd $147,%xmm3,%xmm3
vpaddd %xmm1,%xmm0,%xmm0
vpxor %xmm0,%xmm3,%xmm3
.byte 143,232,120,194,219,16
vpaddd %xmm3,%xmm2,%xmm2
vpxor %xmm2,%xmm1,%xmm1
.byte 143,232,120,194,201,12
vpaddd %xmm1,%xmm0,%xmm0
vpxor %xmm0,%xmm3,%xmm3
.byte 143,232,120,194,219,8
vpaddd %xmm3,%xmm2,%xmm2
vpxor %xmm2,%xmm1,%xmm1
.byte 143,232,120,194,201,7
vpshufd $78,%xmm2,%xmm2
vpshufd $147,%xmm1,%xmm1
vpshufd $57,%xmm3,%xmm3
decl %edx
jnz .L018loop1x
vpaddd (%esp),%xmm0,%xmm0
vpaddd 16(%esp),%xmm1,%xmm1
vpaddd 32(%esp),%xmm2,%xmm2
vpaddd 48(%esp),%xmm3,%xmm3
cmpl $64,%ecx
jb .L020tail
vpxor (%esi),%xmm0,%xmm0
vpxor 16(%esi),%xmm1,%xmm1
vpxor 32(%esi),%xmm2,%xmm2
vpxor 48(%esi),%xmm3,%xmm3
leal 64(%esi),%esi
vmovdqu %xmm0,(%edi)
vmovdqu %xmm1,16(%edi)
vmovdqu %xmm2,32(%edi)
vmovdqu %xmm3,48(%edi)
leal 64(%edi),%edi
subl $64,%ecx
jnz .L019outer1x
jmp .L017done
.L020tail:
vmovdqa %xmm0,(%esp)
vmovdqa %xmm1,16(%esp)
vmovdqa %xmm2,32(%esp)
vmovdqa %xmm3,48(%esp)
xorl %eax,%eax
xorl %edx,%edx
xorl %ebp,%ebp
.L021tail_loop:
movb (%esp,%ebp,1),%al
movb (%esi,%ebp,1),%dl
leal 1(%ebp),%ebp
xorb %dl,%al
movb %al,-1(%edi,%ebp,1)
decl %ecx
jnz .L021tail_loop
.L017done:
vzeroupper
movl 512(%esp),%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size ChaCha20_xop,.-.L_ChaCha20_xop_begin
.comm OPENSSL_ia32cap_P,16,4
#endif