Regen assembly files for i386 after r338846.

This commit is contained in:
Jung-uk Kim 2018-09-20 22:48:34 +00:00
parent 4cd58f1ace
commit 63ffbd00fc
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/projects/openssl111/; revision=338847
11 changed files with 25357 additions and 1801 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -2746,6 +2746,8 @@ ecp_nistz256_to_mont:
call _picup_eax
.L000pic:
leal .LRR-.L000pic(%eax),%ebp
leal OPENSSL_ia32cap_P-.L000pic(%eax),%eax
movl (%eax),%eax
movl 20(%esp),%edi
call _ecp_nistz256_mul_mont
popl %edi
@ -2767,6 +2769,8 @@ ecp_nistz256_from_mont:
call _picup_eax
.L001pic:
leal .LONE-.L001pic(%eax),%ebp
leal OPENSSL_ia32cap_P-.L001pic(%eax),%eax
movl (%eax),%eax
movl 20(%esp),%edi
call _ecp_nistz256_mul_mont
popl %edi
@ -2786,6 +2790,10 @@ ecp_nistz256_mul_mont:
pushl %edi
movl 24(%esp),%esi
movl 28(%esp),%ebp
call _picup_eax
.L002pic:
leal OPENSSL_ia32cap_P-.L002pic(%eax),%eax
movl (%eax),%eax
movl 20(%esp),%edi
call _ecp_nistz256_mul_mont
popl %edi
@ -2804,6 +2812,10 @@ ecp_nistz256_sqr_mont:
pushl %esi
pushl %edi
movl 24(%esp),%esi
call _picup_eax
.L003pic:
leal OPENSSL_ia32cap_P-.L003pic(%eax),%eax
movl (%eax),%eax
movl 20(%esp),%edi
movl %esi,%ebp
call _ecp_nistz256_mul_mont
@ -2816,6 +2828,265 @@ ecp_nistz256_sqr_mont:
.type _ecp_nistz256_mul_mont,@function
.align 16
_ecp_nistz256_mul_mont:
andl $83886080,%eax
cmpl $83886080,%eax
jne .L004mul_mont_ialu
movl %esp,%edx
subl $256,%esp
movd (%ebp),%xmm7
leal 4(%ebp),%ebp
pcmpeqd %xmm6,%xmm6
psrlq $48,%xmm6
pshuflw $220,%xmm7,%xmm7
andl $-64,%esp
pshufd $220,%xmm7,%xmm7
leal 128(%esp),%ebx
movd (%esi),%xmm0
pshufd $204,%xmm0,%xmm0
movd 4(%esi),%xmm1
movdqa %xmm0,(%ebx)
pmuludq %xmm7,%xmm0
movd 8(%esi),%xmm2
pshufd $204,%xmm1,%xmm1
movdqa %xmm1,16(%ebx)
pmuludq %xmm7,%xmm1
movq %xmm0,%xmm4
pslldq $6,%xmm4
paddq %xmm0,%xmm4
movdqa %xmm4,%xmm5
psrldq $10,%xmm4
pand %xmm6,%xmm5
movd 12(%esi),%xmm3
pshufd $204,%xmm2,%xmm2
movdqa %xmm2,32(%ebx)
pmuludq %xmm7,%xmm2
paddq %xmm4,%xmm1
movdqa %xmm1,(%esp)
movd 16(%esi),%xmm0
pshufd $204,%xmm3,%xmm3
movdqa %xmm3,48(%ebx)
pmuludq %xmm7,%xmm3
movdqa %xmm2,16(%esp)
movd 20(%esi),%xmm1
pshufd $204,%xmm0,%xmm0
movdqa %xmm0,64(%ebx)
pmuludq %xmm7,%xmm0
paddq %xmm5,%xmm3
movdqa %xmm3,32(%esp)
movd 24(%esi),%xmm2
pshufd $204,%xmm1,%xmm1
movdqa %xmm1,80(%ebx)
pmuludq %xmm7,%xmm1
movdqa %xmm0,48(%esp)
pshufd $177,%xmm5,%xmm4
movd 28(%esi),%xmm3
pshufd $204,%xmm2,%xmm2
movdqa %xmm2,96(%ebx)
pmuludq %xmm7,%xmm2
movdqa %xmm1,64(%esp)
psubq %xmm5,%xmm4
movd (%ebp),%xmm0
pshufd $204,%xmm3,%xmm3
movdqa %xmm3,112(%ebx)
pmuludq %xmm7,%xmm3
pshuflw $220,%xmm0,%xmm7
movdqa (%ebx),%xmm0
pshufd $220,%xmm7,%xmm7
movl $6,%ecx
leal 4(%ebp),%ebp
jmp .L005madd_sse2
.align 16
.L005madd_sse2:
paddq %xmm5,%xmm2
paddq %xmm4,%xmm3
movdqa 16(%ebx),%xmm1
pmuludq %xmm7,%xmm0
movdqa %xmm2,80(%esp)
movdqa 32(%ebx),%xmm2
pmuludq %xmm7,%xmm1
movdqa %xmm3,96(%esp)
paddq (%esp),%xmm0
movdqa 48(%ebx),%xmm3
pmuludq %xmm7,%xmm2
movq %xmm0,%xmm4
pslldq $6,%xmm4
paddq 16(%esp),%xmm1
paddq %xmm0,%xmm4
movdqa %xmm4,%xmm5
psrldq $10,%xmm4
movdqa 64(%ebx),%xmm0
pmuludq %xmm7,%xmm3
paddq %xmm4,%xmm1
paddq 32(%esp),%xmm2
movdqa %xmm1,(%esp)
movdqa 80(%ebx),%xmm1
pmuludq %xmm7,%xmm0
paddq 48(%esp),%xmm3
movdqa %xmm2,16(%esp)
pand %xmm6,%xmm5
movdqa 96(%ebx),%xmm2
pmuludq %xmm7,%xmm1
paddq %xmm5,%xmm3
paddq 64(%esp),%xmm0
movdqa %xmm3,32(%esp)
pshufd $177,%xmm5,%xmm4
movdqa %xmm7,%xmm3
pmuludq %xmm7,%xmm2
movd (%ebp),%xmm7
leal 4(%ebp),%ebp
paddq 80(%esp),%xmm1
psubq %xmm5,%xmm4
movdqa %xmm0,48(%esp)
pshuflw $220,%xmm7,%xmm7
pmuludq 112(%ebx),%xmm3
pshufd $220,%xmm7,%xmm7
movdqa (%ebx),%xmm0
movdqa %xmm1,64(%esp)
paddq 96(%esp),%xmm2
decl %ecx
jnz .L005madd_sse2
paddq %xmm5,%xmm2
paddq %xmm4,%xmm3
movdqa 16(%ebx),%xmm1
pmuludq %xmm7,%xmm0
movdqa %xmm2,80(%esp)
movdqa 32(%ebx),%xmm2
pmuludq %xmm7,%xmm1
movdqa %xmm3,96(%esp)
paddq (%esp),%xmm0
movdqa 48(%ebx),%xmm3
pmuludq %xmm7,%xmm2
movq %xmm0,%xmm4
pslldq $6,%xmm4
paddq 16(%esp),%xmm1
paddq %xmm0,%xmm4
movdqa %xmm4,%xmm5
psrldq $10,%xmm4
movdqa 64(%ebx),%xmm0
pmuludq %xmm7,%xmm3
paddq %xmm4,%xmm1
paddq 32(%esp),%xmm2
movdqa %xmm1,(%esp)
movdqa 80(%ebx),%xmm1
pmuludq %xmm7,%xmm0
paddq 48(%esp),%xmm3
movdqa %xmm2,16(%esp)
pand %xmm6,%xmm5
movdqa 96(%ebx),%xmm2
pmuludq %xmm7,%xmm1
paddq %xmm5,%xmm3
paddq 64(%esp),%xmm0
movdqa %xmm3,32(%esp)
pshufd $177,%xmm5,%xmm4
movdqa 112(%ebx),%xmm3
pmuludq %xmm7,%xmm2
paddq 80(%esp),%xmm1
psubq %xmm5,%xmm4
movdqa %xmm0,48(%esp)
pmuludq %xmm7,%xmm3
pcmpeqd %xmm7,%xmm7
movdqa (%esp),%xmm0
pslldq $8,%xmm7
movdqa %xmm1,64(%esp)
paddq 96(%esp),%xmm2
paddq %xmm5,%xmm2
paddq %xmm4,%xmm3
movdqa %xmm2,80(%esp)
movdqa %xmm3,96(%esp)
movdqa 16(%esp),%xmm1
movdqa 32(%esp),%xmm2
movdqa 48(%esp),%xmm3
movq %xmm0,%xmm4
pand %xmm7,%xmm0
xorl %ebp,%ebp
pslldq $6,%xmm4
movq %xmm1,%xmm5
paddq %xmm4,%xmm0
pand %xmm7,%xmm1
psrldq $6,%xmm0
movd %xmm0,%eax
psrldq $4,%xmm0
paddq %xmm0,%xmm5
movdqa 64(%esp),%xmm0
subl $-1,%eax
pslldq $6,%xmm5
movq %xmm2,%xmm4
paddq %xmm5,%xmm1
pand %xmm7,%xmm2
psrldq $6,%xmm1
movl %eax,(%edi)
movd %xmm1,%eax
psrldq $4,%xmm1
paddq %xmm1,%xmm4
movdqa 80(%esp),%xmm1
sbbl $-1,%eax
pslldq $6,%xmm4
movq %xmm3,%xmm5
paddq %xmm4,%xmm2
pand %xmm7,%xmm3
psrldq $6,%xmm2
movl %eax,4(%edi)
movd %xmm2,%eax
psrldq $4,%xmm2
paddq %xmm2,%xmm5
movdqa 96(%esp),%xmm2
sbbl $-1,%eax
pslldq $6,%xmm5
movq %xmm0,%xmm4
paddq %xmm5,%xmm3
pand %xmm7,%xmm0
psrldq $6,%xmm3
movl %eax,8(%edi)
movd %xmm3,%eax
psrldq $4,%xmm3
paddq %xmm3,%xmm4
sbbl $0,%eax
pslldq $6,%xmm4
movq %xmm1,%xmm5
paddq %xmm4,%xmm0
pand %xmm7,%xmm1
psrldq $6,%xmm0
movl %eax,12(%edi)
movd %xmm0,%eax
psrldq $4,%xmm0
paddq %xmm0,%xmm5
sbbl $0,%eax
pslldq $6,%xmm5
movq %xmm2,%xmm4
paddq %xmm5,%xmm1
pand %xmm7,%xmm2
psrldq $6,%xmm1
movd %xmm1,%ebx
psrldq $4,%xmm1
movl %edx,%esp
paddq %xmm1,%xmm4
pslldq $6,%xmm4
paddq %xmm4,%xmm2
psrldq $6,%xmm2
movd %xmm2,%ecx
psrldq $4,%xmm2
sbbl $0,%ebx
movd %xmm2,%edx
pextrw $2,%xmm2,%esi
sbbl $1,%ecx
sbbl $-1,%edx
sbbl $0,%esi
subl %esi,%ebp
addl %esi,(%edi)
adcl %esi,4(%edi)
adcl %esi,8(%edi)
adcl $0,12(%edi)
adcl $0,%eax
adcl $0,%ebx
movl %eax,16(%edi)
adcl %ebp,%ecx
movl %ebx,20(%edi)
adcl %esi,%edx
movl %ecx,24(%edi)
movl %edx,28(%edi)
ret
.align 16
.L004mul_mont_ialu:
subl $40,%esp
movl (%esi),%eax
movl (%ebp),%ebx
@ -3463,7 +3734,7 @@ ecp_nistz256_scatter_w5:
movl 28(%esp),%ebp
leal 124(%edi,%ebp,4),%edi
movl $6,%ebp
.L002scatter_w5_loop:
.L006scatter_w5_loop:
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
@ -3475,7 +3746,7 @@ ecp_nistz256_scatter_w5:
movl %edx,64(%edi)
leal 256(%edi),%edi
decl %ebp
jnz .L002scatter_w5_loop
jnz .L006scatter_w5_loop
popl %edi
popl %esi
popl %ebx
@ -3590,7 +3861,7 @@ ecp_nistz256_scatter_w7:
movl 28(%esp),%ebp
leal (%edi,%ebp,1),%edi
movl $16,%ebp
.L003scatter_w7_loop:
.L007scatter_w7_loop:
movl (%esi),%eax
leal 4(%esi),%esi
movb %al,(%edi)
@ -3600,7 +3871,7 @@ ecp_nistz256_scatter_w7:
movb %ah,192(%edi)
leal 256(%edi),%edi
decl %ebp
jnz .L003scatter_w7_loop
jnz .L007scatter_w7_loop
popl %edi
popl %esi
popl %ebx
@ -3832,6 +4103,10 @@ ecp_nistz256_point_double:
pushl %edi
movl 24(%esp),%esi
subl $164,%esp
call _picup_eax
.L008pic:
leal OPENSSL_ia32cap_P-.L008pic(%eax),%edx
movl (%edx),%ebp
.Lpoint_double_shortcut:
movl (%esi),%eax
movl 4(%esi),%ebx
@ -3954,6 +4229,10 @@ ecp_nistz256_point_add:
pushl %edi
movl 28(%esp),%esi
subl $596,%esp
call _picup_eax
.L009pic:
leal OPENSSL_ia32cap_P-.L009pic(%eax),%edx
movl (%edx),%ebp
leal 192(%esp),%edi
movl (%esi),%eax
movl 4(%esi),%ebx
@ -4144,26 +4423,26 @@ ecp_nistz256_point_add:
orl 8(%edi),%eax
orl 12(%edi),%eax
.byte 62
jnz .L004add_proceed
jnz .L010add_proceed
movl 576(%esp),%eax
andl 580(%esp),%eax
movl 584(%esp),%ebx
jz .L004add_proceed
jz .L010add_proceed
testl %ebx,%ebx
jz .L005add_double
jz .L011add_double
movl 616(%esp),%edi
xorl %eax,%eax
movl $24,%ecx
.byte 252,243,171
jmp .L006add_done
jmp .L012add_done
.align 16
.L005add_double:
.L011add_double:
movl 620(%esp),%esi
movl 588(%esp),%ebp
addl $432,%esp
jmp .Lpoint_double_shortcut
.align 16
.L004add_proceed:
.L010add_proceed:
movl 588(%esp),%eax
leal 352(%esp),%esi
leal 352(%esp),%ebp
@ -4448,7 +4727,7 @@ ecp_nistz256_point_add:
orl %ebx,%eax
orl %ecx,%eax
movl %eax,60(%edi)
.L006add_done:
.L012add_done:
addl $596,%esp
popl %edi
popl %esi
@ -4467,6 +4746,10 @@ ecp_nistz256_point_add_affine:
pushl %edi
movl 24(%esp),%esi
subl $492,%esp
call _picup_eax
.L013pic:
leal OPENSSL_ia32cap_P-.L013pic(%eax),%edx
movl (%edx),%ebp
leal 96(%esp),%edi
movl (%esi),%eax
movl 4(%esi),%ebx
@ -4884,6 +5167,7 @@ ecp_nistz256_point_add_affine:
popl %ebp
ret
.size ecp_nistz256_point_add_affine,.-.L_ecp_nistz256_point_add_affine_begin
.comm OPENSSL_ia32cap_P,16,4
#else
.text
.globl ecp_nistz256_precomputed
@ -7630,6 +7914,8 @@ ecp_nistz256_to_mont:
call _picup_eax
.L000pic:
leal .LRR-.L000pic(%eax),%ebp
leal OPENSSL_ia32cap_P,%eax
movl (%eax),%eax
movl 20(%esp),%edi
call _ecp_nistz256_mul_mont
popl %edi
@ -7651,6 +7937,8 @@ ecp_nistz256_from_mont:
call _picup_eax
.L001pic:
leal .LONE-.L001pic(%eax),%ebp
leal OPENSSL_ia32cap_P,%eax
movl (%eax),%eax
movl 20(%esp),%edi
call _ecp_nistz256_mul_mont
popl %edi
@ -7670,6 +7958,10 @@ ecp_nistz256_mul_mont:
pushl %edi
movl 24(%esp),%esi
movl 28(%esp),%ebp
call _picup_eax
.L002pic:
leal OPENSSL_ia32cap_P,%eax
movl (%eax),%eax
movl 20(%esp),%edi
call _ecp_nistz256_mul_mont
popl %edi
@ -7688,6 +7980,10 @@ ecp_nistz256_sqr_mont:
pushl %esi
pushl %edi
movl 24(%esp),%esi
call _picup_eax
.L003pic:
leal OPENSSL_ia32cap_P,%eax
movl (%eax),%eax
movl 20(%esp),%edi
movl %esi,%ebp
call _ecp_nistz256_mul_mont
@ -7700,6 +7996,265 @@ ecp_nistz256_sqr_mont:
.type _ecp_nistz256_mul_mont,@function
.align 16
_ecp_nistz256_mul_mont:
andl $83886080,%eax
cmpl $83886080,%eax
jne .L004mul_mont_ialu
movl %esp,%edx
subl $256,%esp
movd (%ebp),%xmm7
leal 4(%ebp),%ebp
pcmpeqd %xmm6,%xmm6
psrlq $48,%xmm6
pshuflw $220,%xmm7,%xmm7
andl $-64,%esp
pshufd $220,%xmm7,%xmm7
leal 128(%esp),%ebx
movd (%esi),%xmm0
pshufd $204,%xmm0,%xmm0
movd 4(%esi),%xmm1
movdqa %xmm0,(%ebx)
pmuludq %xmm7,%xmm0
movd 8(%esi),%xmm2
pshufd $204,%xmm1,%xmm1
movdqa %xmm1,16(%ebx)
pmuludq %xmm7,%xmm1
movq %xmm0,%xmm4
pslldq $6,%xmm4
paddq %xmm0,%xmm4
movdqa %xmm4,%xmm5
psrldq $10,%xmm4
pand %xmm6,%xmm5
movd 12(%esi),%xmm3
pshufd $204,%xmm2,%xmm2
movdqa %xmm2,32(%ebx)
pmuludq %xmm7,%xmm2
paddq %xmm4,%xmm1
movdqa %xmm1,(%esp)
movd 16(%esi),%xmm0
pshufd $204,%xmm3,%xmm3
movdqa %xmm3,48(%ebx)
pmuludq %xmm7,%xmm3
movdqa %xmm2,16(%esp)
movd 20(%esi),%xmm1
pshufd $204,%xmm0,%xmm0
movdqa %xmm0,64(%ebx)
pmuludq %xmm7,%xmm0
paddq %xmm5,%xmm3
movdqa %xmm3,32(%esp)
movd 24(%esi),%xmm2
pshufd $204,%xmm1,%xmm1
movdqa %xmm1,80(%ebx)
pmuludq %xmm7,%xmm1
movdqa %xmm0,48(%esp)
pshufd $177,%xmm5,%xmm4
movd 28(%esi),%xmm3
pshufd $204,%xmm2,%xmm2
movdqa %xmm2,96(%ebx)
pmuludq %xmm7,%xmm2
movdqa %xmm1,64(%esp)
psubq %xmm5,%xmm4
movd (%ebp),%xmm0
pshufd $204,%xmm3,%xmm3
movdqa %xmm3,112(%ebx)
pmuludq %xmm7,%xmm3
pshuflw $220,%xmm0,%xmm7
movdqa (%ebx),%xmm0
pshufd $220,%xmm7,%xmm7
movl $6,%ecx
leal 4(%ebp),%ebp
jmp .L005madd_sse2
.align 16
.L005madd_sse2:
paddq %xmm5,%xmm2
paddq %xmm4,%xmm3
movdqa 16(%ebx),%xmm1
pmuludq %xmm7,%xmm0
movdqa %xmm2,80(%esp)
movdqa 32(%ebx),%xmm2
pmuludq %xmm7,%xmm1
movdqa %xmm3,96(%esp)
paddq (%esp),%xmm0
movdqa 48(%ebx),%xmm3
pmuludq %xmm7,%xmm2
movq %xmm0,%xmm4
pslldq $6,%xmm4
paddq 16(%esp),%xmm1
paddq %xmm0,%xmm4
movdqa %xmm4,%xmm5
psrldq $10,%xmm4
movdqa 64(%ebx),%xmm0
pmuludq %xmm7,%xmm3
paddq %xmm4,%xmm1
paddq 32(%esp),%xmm2
movdqa %xmm1,(%esp)
movdqa 80(%ebx),%xmm1
pmuludq %xmm7,%xmm0
paddq 48(%esp),%xmm3
movdqa %xmm2,16(%esp)
pand %xmm6,%xmm5
movdqa 96(%ebx),%xmm2
pmuludq %xmm7,%xmm1
paddq %xmm5,%xmm3
paddq 64(%esp),%xmm0
movdqa %xmm3,32(%esp)
pshufd $177,%xmm5,%xmm4
movdqa %xmm7,%xmm3
pmuludq %xmm7,%xmm2
movd (%ebp),%xmm7
leal 4(%ebp),%ebp
paddq 80(%esp),%xmm1
psubq %xmm5,%xmm4
movdqa %xmm0,48(%esp)
pshuflw $220,%xmm7,%xmm7
pmuludq 112(%ebx),%xmm3
pshufd $220,%xmm7,%xmm7
movdqa (%ebx),%xmm0
movdqa %xmm1,64(%esp)
paddq 96(%esp),%xmm2
decl %ecx
jnz .L005madd_sse2
paddq %xmm5,%xmm2
paddq %xmm4,%xmm3
movdqa 16(%ebx),%xmm1
pmuludq %xmm7,%xmm0
movdqa %xmm2,80(%esp)
movdqa 32(%ebx),%xmm2
pmuludq %xmm7,%xmm1
movdqa %xmm3,96(%esp)
paddq (%esp),%xmm0
movdqa 48(%ebx),%xmm3
pmuludq %xmm7,%xmm2
movq %xmm0,%xmm4
pslldq $6,%xmm4
paddq 16(%esp),%xmm1
paddq %xmm0,%xmm4
movdqa %xmm4,%xmm5
psrldq $10,%xmm4
movdqa 64(%ebx),%xmm0
pmuludq %xmm7,%xmm3
paddq %xmm4,%xmm1
paddq 32(%esp),%xmm2
movdqa %xmm1,(%esp)
movdqa 80(%ebx),%xmm1
pmuludq %xmm7,%xmm0
paddq 48(%esp),%xmm3
movdqa %xmm2,16(%esp)
pand %xmm6,%xmm5
movdqa 96(%ebx),%xmm2
pmuludq %xmm7,%xmm1
paddq %xmm5,%xmm3
paddq 64(%esp),%xmm0
movdqa %xmm3,32(%esp)
pshufd $177,%xmm5,%xmm4
movdqa 112(%ebx),%xmm3
pmuludq %xmm7,%xmm2
paddq 80(%esp),%xmm1
psubq %xmm5,%xmm4
movdqa %xmm0,48(%esp)
pmuludq %xmm7,%xmm3
pcmpeqd %xmm7,%xmm7
movdqa (%esp),%xmm0
pslldq $8,%xmm7
movdqa %xmm1,64(%esp)
paddq 96(%esp),%xmm2
paddq %xmm5,%xmm2
paddq %xmm4,%xmm3
movdqa %xmm2,80(%esp)
movdqa %xmm3,96(%esp)
movdqa 16(%esp),%xmm1
movdqa 32(%esp),%xmm2
movdqa 48(%esp),%xmm3
movq %xmm0,%xmm4
pand %xmm7,%xmm0
xorl %ebp,%ebp
pslldq $6,%xmm4
movq %xmm1,%xmm5
paddq %xmm4,%xmm0
pand %xmm7,%xmm1
psrldq $6,%xmm0
movd %xmm0,%eax
psrldq $4,%xmm0
paddq %xmm0,%xmm5
movdqa 64(%esp),%xmm0
subl $-1,%eax
pslldq $6,%xmm5
movq %xmm2,%xmm4
paddq %xmm5,%xmm1
pand %xmm7,%xmm2
psrldq $6,%xmm1
movl %eax,(%edi)
movd %xmm1,%eax
psrldq $4,%xmm1
paddq %xmm1,%xmm4
movdqa 80(%esp),%xmm1
sbbl $-1,%eax
pslldq $6,%xmm4
movq %xmm3,%xmm5
paddq %xmm4,%xmm2
pand %xmm7,%xmm3
psrldq $6,%xmm2
movl %eax,4(%edi)
movd %xmm2,%eax
psrldq $4,%xmm2
paddq %xmm2,%xmm5
movdqa 96(%esp),%xmm2
sbbl $-1,%eax
pslldq $6,%xmm5
movq %xmm0,%xmm4
paddq %xmm5,%xmm3
pand %xmm7,%xmm0
psrldq $6,%xmm3
movl %eax,8(%edi)
movd %xmm3,%eax
psrldq $4,%xmm3
paddq %xmm3,%xmm4
sbbl $0,%eax
pslldq $6,%xmm4
movq %xmm1,%xmm5
paddq %xmm4,%xmm0
pand %xmm7,%xmm1
psrldq $6,%xmm0
movl %eax,12(%edi)
movd %xmm0,%eax
psrldq $4,%xmm0
paddq %xmm0,%xmm5
sbbl $0,%eax
pslldq $6,%xmm5
movq %xmm2,%xmm4
paddq %xmm5,%xmm1
pand %xmm7,%xmm2
psrldq $6,%xmm1
movd %xmm1,%ebx
psrldq $4,%xmm1
movl %edx,%esp
paddq %xmm1,%xmm4
pslldq $6,%xmm4
paddq %xmm4,%xmm2
psrldq $6,%xmm2
movd %xmm2,%ecx
psrldq $4,%xmm2
sbbl $0,%ebx
movd %xmm2,%edx
pextrw $2,%xmm2,%esi
sbbl $1,%ecx
sbbl $-1,%edx
sbbl $0,%esi
subl %esi,%ebp
addl %esi,(%edi)
adcl %esi,4(%edi)
adcl %esi,8(%edi)
adcl $0,12(%edi)
adcl $0,%eax
adcl $0,%ebx
movl %eax,16(%edi)
adcl %ebp,%ecx
movl %ebx,20(%edi)
adcl %esi,%edx
movl %ecx,24(%edi)
movl %edx,28(%edi)
ret
.align 16
.L004mul_mont_ialu:
subl $40,%esp
movl (%esi),%eax
movl (%ebp),%ebx
@ -8347,7 +8902,7 @@ ecp_nistz256_scatter_w5:
movl 28(%esp),%ebp
leal 124(%edi,%ebp,4),%edi
movl $6,%ebp
.L002scatter_w5_loop:
.L006scatter_w5_loop:
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
@ -8359,7 +8914,7 @@ ecp_nistz256_scatter_w5:
movl %edx,64(%edi)
leal 256(%edi),%edi
decl %ebp
jnz .L002scatter_w5_loop
jnz .L006scatter_w5_loop
popl %edi
popl %esi
popl %ebx
@ -8474,7 +9029,7 @@ ecp_nistz256_scatter_w7:
movl 28(%esp),%ebp
leal (%edi,%ebp,1),%edi
movl $16,%ebp
.L003scatter_w7_loop:
.L007scatter_w7_loop:
movl (%esi),%eax
leal 4(%esi),%esi
movb %al,(%edi)
@ -8484,7 +9039,7 @@ ecp_nistz256_scatter_w7:
movb %ah,192(%edi)
leal 256(%edi),%edi
decl %ebp
jnz .L003scatter_w7_loop
jnz .L007scatter_w7_loop
popl %edi
popl %esi
popl %ebx
@ -8716,6 +9271,10 @@ ecp_nistz256_point_double:
pushl %edi
movl 24(%esp),%esi
subl $164,%esp
call _picup_eax
.L008pic:
leal OPENSSL_ia32cap_P,%edx
movl (%edx),%ebp
.Lpoint_double_shortcut:
movl (%esi),%eax
movl 4(%esi),%ebx
@ -8838,6 +9397,10 @@ ecp_nistz256_point_add:
pushl %edi
movl 28(%esp),%esi
subl $596,%esp
call _picup_eax
.L009pic:
leal OPENSSL_ia32cap_P,%edx
movl (%edx),%ebp
leal 192(%esp),%edi
movl (%esi),%eax
movl 4(%esi),%ebx
@ -9028,26 +9591,26 @@ ecp_nistz256_point_add:
orl 8(%edi),%eax
orl 12(%edi),%eax
.byte 62
jnz .L004add_proceed
jnz .L010add_proceed
movl 576(%esp),%eax
andl 580(%esp),%eax
movl 584(%esp),%ebx
jz .L004add_proceed
jz .L010add_proceed
testl %ebx,%ebx
jz .L005add_double
jz .L011add_double
movl 616(%esp),%edi
xorl %eax,%eax
movl $24,%ecx
.byte 252,243,171
jmp .L006add_done
jmp .L012add_done
.align 16
.L005add_double:
.L011add_double:
movl 620(%esp),%esi
movl 588(%esp),%ebp
addl $432,%esp
jmp .Lpoint_double_shortcut
.align 16
.L004add_proceed:
.L010add_proceed:
movl 588(%esp),%eax
leal 352(%esp),%esi
leal 352(%esp),%ebp
@ -9332,7 +9895,7 @@ ecp_nistz256_point_add:
orl %ebx,%eax
orl %ecx,%eax
movl %eax,60(%edi)
.L006add_done:
.L012add_done:
addl $596,%esp
popl %edi
popl %esi
@ -9351,6 +9914,10 @@ ecp_nistz256_point_add_affine:
pushl %edi
movl 24(%esp),%esi
subl $492,%esp
call _picup_eax
.L013pic:
leal OPENSSL_ia32cap_P,%edx
movl (%edx),%ebp
leal 96(%esp),%edi
movl (%esi),%eax
movl 4(%esi),%ebx
@ -9768,4 +10335,5 @@ ecp_nistz256_point_add_affine:
popl %ebp
ret
.size ecp_nistz256_point_add_affine,.-.L_ecp_nistz256_point_add_affine_begin
.comm OPENSSL_ia32cap_P,16,4
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -250,6 +250,18 @@ bn_GF2m_mul_2x2:
movl 4(%edx),%edx
testl $8388608,%eax
jz .L001ialu
testl $16777216,%eax
jz .L002mmx
testl $2,%edx
jz .L002mmx
movups 8(%esp),%xmm0
shufps $177,%xmm0,%xmm0
.byte 102,15,58,68,192,1
movl 4(%esp),%eax
movups %xmm0,(%eax)
ret
.align 16
.L002mmx:
pushl %ebp
pushl %ebx
pushl %esi
@ -581,6 +593,18 @@ bn_GF2m_mul_2x2:
movl 4(%edx),%edx
testl $8388608,%eax
jz .L000ialu
testl $16777216,%eax
jz .L001mmx
testl $2,%edx
jz .L001mmx
movups 8(%esp),%xmm0
shufps $177,%xmm0,%xmm0
.byte 102,15,58,68,192,1
movl 4(%esp),%eax
movups %xmm0,(%eax)
ret
.align 16
.L001mmx:
pushl %ebp
pushl %ebx
pushl %esi

View File

@ -59,6 +59,126 @@ bn_mul_mont:
movl %esi,20(%esp)
leal -3(%edi),%ebx
movl %edx,24(%esp)
call .L003PIC_me_up
.L003PIC_me_up:
popl %eax
leal OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc .L004non_sse2
movl $-1,%eax
movd %eax,%mm7
movl 8(%esp),%esi
movl 12(%esp),%edi
movl 16(%esp),%ebp
xorl %edx,%edx
xorl %ecx,%ecx
movd (%edi),%mm4
movd (%esi),%mm5
movd (%ebp),%mm3
pmuludq %mm4,%mm5
movq %mm5,%mm2
movq %mm5,%mm0
pand %mm7,%mm0
pmuludq 20(%esp),%mm5
pmuludq %mm5,%mm3
paddq %mm0,%mm3
movd 4(%ebp),%mm1
movd 4(%esi),%mm0
psrlq $32,%mm2
psrlq $32,%mm3
incl %ecx
.align 16
.L0051st:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
movd 4(%ebp,%ecx,4),%mm1
paddq %mm0,%mm3
movd 4(%esi,%ecx,4),%mm0
psrlq $32,%mm2
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm3
leal 1(%ecx),%ecx
cmpl %ebx,%ecx
jl .L0051st
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
paddq %mm0,%mm3
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm2
psrlq $32,%mm3
paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4)
incl %edx
.L006outer:
xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4
movd (%esi),%mm5
movd 32(%esp),%mm6
movd (%ebp),%mm3
pmuludq %mm4,%mm5
paddq %mm6,%mm5
movq %mm5,%mm0
movq %mm5,%mm2
pand %mm7,%mm0
pmuludq 20(%esp),%mm5
pmuludq %mm5,%mm3
paddq %mm0,%mm3
movd 36(%esp),%mm6
movd 4(%ebp),%mm1
movd 4(%esi),%mm0
psrlq $32,%mm2
psrlq $32,%mm3
paddq %mm6,%mm2
incl %ecx
decl %ebx
.L007inner:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
movd 36(%esp,%ecx,4),%mm6
pand %mm7,%mm0
movd 4(%ebp,%ecx,4),%mm1
paddq %mm0,%mm3
movd 4(%esi,%ecx,4),%mm0
psrlq $32,%mm2
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm3
paddq %mm6,%mm2
decl %ebx
leal 1(%ecx),%ecx
jnz .L007inner
movl %ecx,%ebx
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
paddq %mm0,%mm3
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm2
psrlq $32,%mm3
movd 36(%esp,%ebx,4),%mm6
paddq %mm2,%mm3
paddq %mm6,%mm3
movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx
cmpl %ebx,%edx
jle .L006outer
emms
jmp .L008common_tail
.align 16
.L004non_sse2:
movl 8(%esp),%esi
leal 1(%ebx),%ebp
movl 12(%esp),%edi
@ -69,12 +189,12 @@ bn_mul_mont:
leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp
movl (%edi),%edi
jz .L003bn_sqr_mont
jz .L009bn_sqr_mont
movl %eax,28(%esp)
movl (%esi),%eax
xorl %edx,%edx
.align 16
.L004mull:
.L010mull:
movl %edx,%ebp
mull %edi
addl %eax,%ebp
@ -83,7 +203,7 @@ bn_mul_mont:
movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl .L004mull
jl .L010mull
movl %edx,%ebp
mull %edi
movl 20(%esp),%edi
@ -101,9 +221,9 @@ bn_mul_mont:
movl 4(%esi),%eax
adcl $0,%edx
incl %ecx
jmp .L0052ndmadd
jmp .L0112ndmadd
.align 16
.L0061stmadd:
.L0121stmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -114,7 +234,7 @@ bn_mul_mont:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl .L0061stmadd
jl .L0121stmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%eax
@ -137,7 +257,7 @@ bn_mul_mont:
adcl $0,%edx
movl $1,%ecx
.align 16
.L0052ndmadd:
.L0112ndmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -148,7 +268,7 @@ bn_mul_mont:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl .L0052ndmadd
jl .L0112ndmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@ -164,16 +284,16 @@ bn_mul_mont:
movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4)
je .L007common_tail
je .L008common_tail
movl (%ecx),%edi
movl 8(%esp),%esi
movl %ecx,12(%esp)
xorl %ecx,%ecx
xorl %edx,%edx
movl (%esi),%eax
jmp .L0061stmadd
jmp .L0121stmadd
.align 16
.L003bn_sqr_mont:
.L009bn_sqr_mont:
movl %ebx,(%esp)
movl %ecx,12(%esp)
movl %edi,%eax
@ -184,7 +304,7 @@ bn_mul_mont:
andl $1,%ebx
incl %ecx
.align 16
.L008sqr:
.L013sqr:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -196,7 +316,7 @@ bn_mul_mont:
cmpl (%esp),%ecx
movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4)
jl .L008sqr
jl .L013sqr
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -220,7 +340,7 @@ bn_mul_mont:
movl 4(%esi),%eax
movl $1,%ecx
.align 16
.L0093rdmadd:
.L0143rdmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -239,7 +359,7 @@ bn_mul_mont:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl .L0093rdmadd
jl .L0143rdmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@ -255,7 +375,7 @@ bn_mul_mont:
movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4)
je .L007common_tail
je .L008common_tail
movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx
movl %edi,%eax
@ -267,12 +387,12 @@ bn_mul_mont:
xorl %ebp,%ebp
cmpl %ebx,%ecx
leal 1(%ecx),%ecx
je .L010sqrlast
je .L015sqrlast
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
.align 16
.L011sqradd:
.L016sqradd:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -288,13 +408,13 @@ bn_mul_mont:
cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx
jle .L011sqradd
jle .L016sqradd
movl %edx,%ebp
addl %edx,%edx
shrl $31,%ebp
addl %ebx,%edx
adcl $0,%ebp
.L010sqrlast:
.L015sqrlast:
movl 20(%esp),%edi
movl 16(%esp),%esi
imull 32(%esp),%edi
@ -309,9 +429,9 @@ bn_mul_mont:
adcl $0,%edx
movl $1,%ecx
movl 4(%esi),%eax
jmp .L0093rdmadd
jmp .L0143rdmadd
.align 16
.L007common_tail:
.L008common_tail:
movl 16(%esp),%ebp
movl 4(%esp),%edi
leal 32(%esp),%esi
@ -319,19 +439,19 @@ bn_mul_mont:
movl %ebx,%ecx
xorl %edx,%edx
.align 16
.L012sub:
.L017sub:
sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4)
decl %ecx
movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx
jge .L012sub
jge .L017sub
sbbl $0,%eax
movl $-1,%edx
xorl %eax,%edx
jmp .L013copy
jmp .L018copy
.align 16
.L013copy:
.L018copy:
movl 32(%esp,%ebx,4),%esi
movl (%edi,%ebx,4),%ebp
movl %ecx,32(%esp,%ebx,4)
@ -340,7 +460,7 @@ bn_mul_mont:
orl %esi,%ebp
movl %ebp,(%edi,%ebx,4)
decl %ebx
jge .L013copy
jge .L018copy
movl 24(%esp),%esp
movl $1,%eax
.L000just_leave:
@ -355,6 +475,7 @@ bn_mul_mont:
.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
.byte 111,114,103,62,0
.comm OPENSSL_ia32cap_P,16,4
#else
.text
.globl bn_mul_mont
@ -414,6 +535,123 @@ bn_mul_mont:
movl %esi,20(%esp)
leal -3(%edi),%ebx
movl %edx,24(%esp)
leal OPENSSL_ia32cap_P,%eax
btl $26,(%eax)
jnc .L003non_sse2
movl $-1,%eax
movd %eax,%mm7
movl 8(%esp),%esi
movl 12(%esp),%edi
movl 16(%esp),%ebp
xorl %edx,%edx
xorl %ecx,%ecx
movd (%edi),%mm4
movd (%esi),%mm5
movd (%ebp),%mm3
pmuludq %mm4,%mm5
movq %mm5,%mm2
movq %mm5,%mm0
pand %mm7,%mm0
pmuludq 20(%esp),%mm5
pmuludq %mm5,%mm3
paddq %mm0,%mm3
movd 4(%ebp),%mm1
movd 4(%esi),%mm0
psrlq $32,%mm2
psrlq $32,%mm3
incl %ecx
.align 16
.L0041st:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
movd 4(%ebp,%ecx,4),%mm1
paddq %mm0,%mm3
movd 4(%esi,%ecx,4),%mm0
psrlq $32,%mm2
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm3
leal 1(%ecx),%ecx
cmpl %ebx,%ecx
jl .L0041st
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
paddq %mm0,%mm3
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm2
psrlq $32,%mm3
paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4)
incl %edx
.L005outer:
xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4
movd (%esi),%mm5
movd 32(%esp),%mm6
movd (%ebp),%mm3
pmuludq %mm4,%mm5
paddq %mm6,%mm5
movq %mm5,%mm0
movq %mm5,%mm2
pand %mm7,%mm0
pmuludq 20(%esp),%mm5
pmuludq %mm5,%mm3
paddq %mm0,%mm3
movd 36(%esp),%mm6
movd 4(%ebp),%mm1
movd 4(%esi),%mm0
psrlq $32,%mm2
psrlq $32,%mm3
paddq %mm6,%mm2
incl %ecx
decl %ebx
.L006inner:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
movd 36(%esp,%ecx,4),%mm6
pand %mm7,%mm0
movd 4(%ebp,%ecx,4),%mm1
paddq %mm0,%mm3
movd 4(%esi,%ecx,4),%mm0
psrlq $32,%mm2
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm3
paddq %mm6,%mm2
decl %ebx
leal 1(%ecx),%ecx
jnz .L006inner
movl %ecx,%ebx
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
paddq %mm0,%mm3
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm2
psrlq $32,%mm3
movd 36(%esp,%ebx,4),%mm6
paddq %mm2,%mm3
paddq %mm6,%mm3
movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx
cmpl %ebx,%edx
jle .L005outer
emms
jmp .L007common_tail
.align 16
.L003non_sse2:
movl 8(%esp),%esi
leal 1(%ebx),%ebp
movl 12(%esp),%edi
@ -424,12 +662,12 @@ bn_mul_mont:
leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp
movl (%edi),%edi
jz .L003bn_sqr_mont
jz .L008bn_sqr_mont
movl %eax,28(%esp)
movl (%esi),%eax
xorl %edx,%edx
.align 16
.L004mull:
.L009mull:
movl %edx,%ebp
mull %edi
addl %eax,%ebp
@ -438,7 +676,7 @@ bn_mul_mont:
movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl .L004mull
jl .L009mull
movl %edx,%ebp
mull %edi
movl 20(%esp),%edi
@ -456,9 +694,9 @@ bn_mul_mont:
movl 4(%esi),%eax
adcl $0,%edx
incl %ecx
jmp .L0052ndmadd
jmp .L0102ndmadd
.align 16
.L0061stmadd:
.L0111stmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -469,7 +707,7 @@ bn_mul_mont:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl .L0061stmadd
jl .L0111stmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%eax
@ -492,7 +730,7 @@ bn_mul_mont:
adcl $0,%edx
movl $1,%ecx
.align 16
.L0052ndmadd:
.L0102ndmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -503,7 +741,7 @@ bn_mul_mont:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl .L0052ndmadd
jl .L0102ndmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@ -526,9 +764,9 @@ bn_mul_mont:
xorl %ecx,%ecx
xorl %edx,%edx
movl (%esi),%eax
jmp .L0061stmadd
jmp .L0111stmadd
.align 16
.L003bn_sqr_mont:
.L008bn_sqr_mont:
movl %ebx,(%esp)
movl %ecx,12(%esp)
movl %edi,%eax
@ -539,7 +777,7 @@ bn_mul_mont:
andl $1,%ebx
incl %ecx
.align 16
.L008sqr:
.L012sqr:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -551,7 +789,7 @@ bn_mul_mont:
cmpl (%esp),%ecx
movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4)
jl .L008sqr
jl .L012sqr
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -575,7 +813,7 @@ bn_mul_mont:
movl 4(%esi),%eax
movl $1,%ecx
.align 16
.L0093rdmadd:
.L0133rdmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -594,7 +832,7 @@ bn_mul_mont:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl .L0093rdmadd
jl .L0133rdmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@ -622,12 +860,12 @@ bn_mul_mont:
xorl %ebp,%ebp
cmpl %ebx,%ecx
leal 1(%ecx),%ecx
je .L010sqrlast
je .L014sqrlast
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
.align 16
.L011sqradd:
.L015sqradd:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -643,13 +881,13 @@ bn_mul_mont:
cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx
jle .L011sqradd
jle .L015sqradd
movl %edx,%ebp
addl %edx,%edx
shrl $31,%ebp
addl %ebx,%edx
adcl $0,%ebp
.L010sqrlast:
.L014sqrlast:
movl 20(%esp),%edi
movl 16(%esp),%esi
imull 32(%esp),%edi
@ -664,7 +902,7 @@ bn_mul_mont:
adcl $0,%edx
movl $1,%ecx
movl 4(%esi),%eax
jmp .L0093rdmadd
jmp .L0133rdmadd
.align 16
.L007common_tail:
movl 16(%esp),%ebp
@ -674,19 +912,19 @@ bn_mul_mont:
movl %ebx,%ecx
xorl %edx,%edx
.align 16
.L012sub:
.L016sub:
sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4)
decl %ecx
movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx
jge .L012sub
jge .L016sub
sbbl $0,%eax
movl $-1,%edx
xorl %eax,%edx
jmp .L013copy
jmp .L017copy
.align 16
.L013copy:
.L017copy:
movl 32(%esp,%ebx,4),%esi
movl (%edi,%ebx,4),%ebp
movl %ecx,32(%esp,%ebx,4)
@ -695,7 +933,7 @@ bn_mul_mont:
orl %esi,%ebp
movl %ebp,(%edi,%ebx,4)
decl %ebx
jge .L013copy
jge .L017copy
movl 24(%esp),%esp
movl $1,%eax
.L000just_leave:
@ -710,4 +948,5 @@ bn_mul_mont:
.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
.byte 111,114,103,62,0
.comm OPENSSL_ia32cap_P,16,4
#endif

View File

@ -236,6 +236,18 @@ OPENSSL_wipe_cpu:
movl (%ecx),%ecx
btl $1,(%ecx)
jnc .L016no_x87
andl $83886080,%ecx
cmpl $83886080,%ecx
jne .L017no_sse2
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
pxor %xmm7,%xmm7
.L017no_sse2:
.long 4007259865,4007259865,4007259865,4007259865,2430851995
.L016no_x87:
leal 4(%esp),%eax
@ -251,11 +263,11 @@ OPENSSL_atomic_add:
pushl %ebx
nop
movl (%edx),%eax
.L017spin:
.L018spin:
leal (%eax,%ecx,1),%ebx
nop
.long 447811568
jne .L017spin
jne .L018spin
movl %ebx,%eax
popl %ebx
ret
@ -269,32 +281,32 @@ OPENSSL_cleanse:
movl 8(%esp),%ecx
xorl %eax,%eax
cmpl $7,%ecx
jae .L018lot
jae .L019lot
cmpl $0,%ecx
je .L019ret
.L020little:
je .L020ret
.L021little:
movb %al,(%edx)
subl $1,%ecx
leal 1(%edx),%edx
jnz .L020little
.L019ret:
jnz .L021little
.L020ret:
ret
.align 16
.L018lot:
.L019lot:
testl $3,%edx
jz .L021aligned
jz .L022aligned
movb %al,(%edx)
leal -1(%ecx),%ecx
leal 1(%edx),%edx
jmp .L018lot
.L021aligned:
jmp .L019lot
.L022aligned:
movl %eax,(%edx)
leal -4(%ecx),%ecx
testl $-4,%ecx
leal 4(%edx),%edx
jnz .L021aligned
jnz .L022aligned
cmpl $0,%ecx
jne .L020little
jne .L021little
ret
.size OPENSSL_cleanse,.-.L_OPENSSL_cleanse_begin
.globl CRYPTO_memcmp
@ -310,18 +322,18 @@ CRYPTO_memcmp:
xorl %eax,%eax
xorl %edx,%edx
cmpl $0,%ecx
je .L022no_data
.L023loop:
je .L023no_data
.L024loop:
movb (%esi),%dl
leal 1(%esi),%esi
xorb (%edi),%dl
leal 1(%edi),%edi
orb %dl,%al
decl %ecx
jnz .L023loop
jnz .L024loop
negl %eax
shrl $31,%eax
.L022no_data:
.L023no_data:
popl %edi
popl %esi
ret
@ -336,6 +348,38 @@ OPENSSL_instrument_bus:
pushl %esi
pushl %edi
movl $0,%eax
call .L025PIC_me_up
.L025PIC_me_up:
popl %edx
leal OPENSSL_ia32cap_P-.L025PIC_me_up(%edx),%edx
btl $4,(%edx)
jnc .L026nogo
btl $19,(%edx)
jnc .L026nogo
movl 20(%esp),%edi
movl 24(%esp),%ecx
.byte 0x0f,0x31
movl %eax,%esi
movl $0,%ebx
clflush (%edi)
.byte 240
addl %ebx,(%edi)
jmp .L027loop
.align 16
.L027loop:
.byte 0x0f,0x31
movl %eax,%edx
subl %esi,%eax
movl %edx,%esi
movl %eax,%ebx
clflush (%edi)
.byte 240
addl %eax,(%edi)
leal 4(%edi),%edi
subl $1,%ecx
jnz .L027loop
movl 24(%esp),%eax
.L026nogo:
popl %edi
popl %esi
popl %ebx
@ -352,6 +396,51 @@ OPENSSL_instrument_bus2:
pushl %esi
pushl %edi
movl $0,%eax
call .L028PIC_me_up
.L028PIC_me_up:
popl %edx
leal OPENSSL_ia32cap_P-.L028PIC_me_up(%edx),%edx
btl $4,(%edx)
jnc .L029nogo
btl $19,(%edx)
jnc .L029nogo
movl 20(%esp),%edi
movl 24(%esp),%ecx
movl 28(%esp),%ebp
.byte 0x0f,0x31
movl %eax,%esi
movl $0,%ebx
clflush (%edi)
.byte 240
addl %ebx,(%edi)
.byte 0x0f,0x31
movl %eax,%edx
subl %esi,%eax
movl %edx,%esi
movl %eax,%ebx
jmp .L030loop2
.align 16
.L030loop2:
clflush (%edi)
.byte 240
addl %eax,(%edi)
subl $1,%ebp
jz .L031done2
.byte 0x0f,0x31
movl %eax,%edx
subl %esi,%eax
movl %edx,%esi
cmpl %ebx,%eax
movl %eax,%ebx
movl $0,%edx
setne %dl
subl %edx,%ecx
leal (%edi,%edx,4),%edi
jnz .L030loop2
.L031done2:
movl 24(%esp),%eax
subl %ecx,%eax
.L029nogo:
popl %edi
popl %esi
popl %ebx
@ -369,33 +458,33 @@ OPENSSL_ia32_rdrand_bytes:
movl 12(%esp),%edi
movl 16(%esp),%ebx
cmpl $0,%ebx
je .L024done
je .L032done
movl $8,%ecx
.L025loop:
.L033loop:
.byte 15,199,242
jc .L026break
loop .L025loop
jmp .L024done
jc .L034break
loop .L033loop
jmp .L032done
.align 16
.L026break:
.L034break:
cmpl $4,%ebx
jb .L027tail
jb .L035tail
movl %edx,(%edi)
leal 4(%edi),%edi
addl $4,%eax
subl $4,%ebx
jz .L024done
jz .L032done
movl $8,%ecx
jmp .L025loop
jmp .L033loop
.align 16
.L027tail:
.L035tail:
movb %dl,(%edi)
leal 1(%edi),%edi
incl %eax
shrl $8,%edx
decl %ebx
jnz .L027tail
.L024done:
jnz .L035tail
.L032done:
xorl %edx,%edx
popl %ebx
popl %edi
@ -412,33 +501,33 @@ OPENSSL_ia32_rdseed_bytes:
movl 12(%esp),%edi
movl 16(%esp),%ebx
cmpl $0,%ebx
je .L028done
je .L036done
movl $8,%ecx
.L029loop:
.L037loop:
.byte 15,199,250
jc .L030break
loop .L029loop
jmp .L028done
jc .L038break
loop .L037loop
jmp .L036done
.align 16
.L030break:
.L038break:
cmpl $4,%ebx
jb .L031tail
jb .L039tail
movl %edx,(%edi)
leal 4(%edi),%edi
addl $4,%eax
subl $4,%ebx
jz .L028done
jz .L036done
movl $8,%ecx
jmp .L029loop
jmp .L037loop
.align 16
.L031tail:
.L039tail:
movb %dl,(%edi)
leal 1(%edi),%edi
incl %eax
shrl $8,%edx
decl %ebx
jnz .L031tail
.L028done:
jnz .L039tail
.L036done:
xorl %edx,%edx
popl %ebx
popl %edi
@ -676,6 +765,18 @@ OPENSSL_wipe_cpu:
movl (%ecx),%ecx
btl $1,(%ecx)
jnc .L013no_x87
andl $83886080,%ecx
cmpl $83886080,%ecx
jne .L014no_sse2
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
pxor %xmm7,%xmm7
.L014no_sse2:
.long 4007259865,4007259865,4007259865,4007259865,2430851995
.L013no_x87:
leal 4(%esp),%eax
@ -691,11 +792,11 @@ OPENSSL_atomic_add:
pushl %ebx
nop
movl (%edx),%eax
.L014spin:
.L015spin:
leal (%eax,%ecx,1),%ebx
nop
.long 447811568
jne .L014spin
jne .L015spin
movl %ebx,%eax
popl %ebx
ret
@ -709,32 +810,32 @@ OPENSSL_cleanse:
movl 8(%esp),%ecx
xorl %eax,%eax
cmpl $7,%ecx
jae .L015lot
jae .L016lot
cmpl $0,%ecx
je .L016ret
.L017little:
je .L017ret
.L018little:
movb %al,(%edx)
subl $1,%ecx
leal 1(%edx),%edx
jnz .L017little
.L016ret:
jnz .L018little
.L017ret:
ret
.align 16
.L015lot:
.L016lot:
testl $3,%edx
jz .L018aligned
jz .L019aligned
movb %al,(%edx)
leal -1(%ecx),%ecx
leal 1(%edx),%edx
jmp .L015lot
.L018aligned:
jmp .L016lot
.L019aligned:
movl %eax,(%edx)
leal -4(%ecx),%ecx
testl $-4,%ecx
leal 4(%edx),%edx
jnz .L018aligned
jnz .L019aligned
cmpl $0,%ecx
jne .L017little
jne .L018little
ret
.size OPENSSL_cleanse,.-.L_OPENSSL_cleanse_begin
.globl CRYPTO_memcmp
@ -750,18 +851,18 @@ CRYPTO_memcmp:
xorl %eax,%eax
xorl %edx,%edx
cmpl $0,%ecx
je .L019no_data
.L020loop:
je .L020no_data
.L021loop:
movb (%esi),%dl
leal 1(%esi),%esi
xorb (%edi),%dl
leal 1(%edi),%edi
orb %dl,%al
decl %ecx
jnz .L020loop
jnz .L021loop
negl %eax
shrl $31,%eax
.L019no_data:
.L020no_data:
popl %edi
popl %esi
ret
@ -776,6 +877,35 @@ OPENSSL_instrument_bus:
pushl %esi
pushl %edi
movl $0,%eax
leal OPENSSL_ia32cap_P,%edx
btl $4,(%edx)
jnc .L022nogo
btl $19,(%edx)
jnc .L022nogo
movl 20(%esp),%edi
movl 24(%esp),%ecx
.byte 0x0f,0x31
movl %eax,%esi
movl $0,%ebx
clflush (%edi)
.byte 240
addl %ebx,(%edi)
jmp .L023loop
.align 16
.L023loop:
.byte 0x0f,0x31
movl %eax,%edx
subl %esi,%eax
movl %edx,%esi
movl %eax,%ebx
clflush (%edi)
.byte 240
addl %eax,(%edi)
leal 4(%edi),%edi
subl $1,%ecx
jnz .L023loop
movl 24(%esp),%eax
.L022nogo:
popl %edi
popl %esi
popl %ebx
@ -792,6 +922,48 @@ OPENSSL_instrument_bus2:
pushl %esi
pushl %edi
movl $0,%eax
leal OPENSSL_ia32cap_P,%edx
btl $4,(%edx)
jnc .L024nogo
btl $19,(%edx)
jnc .L024nogo
movl 20(%esp),%edi
movl 24(%esp),%ecx
movl 28(%esp),%ebp
.byte 0x0f,0x31
movl %eax,%esi
movl $0,%ebx
clflush (%edi)
.byte 240
addl %ebx,(%edi)
.byte 0x0f,0x31
movl %eax,%edx
subl %esi,%eax
movl %edx,%esi
movl %eax,%ebx
jmp .L025loop2
.align 16
.L025loop2:
clflush (%edi)
.byte 240
addl %eax,(%edi)
subl $1,%ebp
jz .L026done2
.byte 0x0f,0x31
movl %eax,%edx
subl %esi,%eax
movl %edx,%esi
cmpl %ebx,%eax
movl %eax,%ebx
movl $0,%edx
setne %dl
subl %edx,%ecx
leal (%edi,%edx,4),%edi
jnz .L025loop2
.L026done2:
movl 24(%esp),%eax
subl %ecx,%eax
.L024nogo:
popl %edi
popl %esi
popl %ebx
@ -809,33 +981,33 @@ OPENSSL_ia32_rdrand_bytes:
movl 12(%esp),%edi
movl 16(%esp),%ebx
cmpl $0,%ebx
je .L021done
je .L027done
movl $8,%ecx
.L022loop:
.L028loop:
.byte 15,199,242
jc .L023break
loop .L022loop
jmp .L021done
jc .L029break
loop .L028loop
jmp .L027done
.align 16
.L023break:
.L029break:
cmpl $4,%ebx
jb .L024tail
jb .L030tail
movl %edx,(%edi)
leal 4(%edi),%edi
addl $4,%eax
subl $4,%ebx
jz .L021done
jz .L027done
movl $8,%ecx
jmp .L022loop
jmp .L028loop
.align 16
.L024tail:
.L030tail:
movb %dl,(%edi)
leal 1(%edi),%edi
incl %eax
shrl $8,%edx
decl %ebx
jnz .L024tail
.L021done:
jnz .L030tail
.L027done:
xorl %edx,%edx
popl %ebx
popl %edi
@ -852,33 +1024,33 @@ OPENSSL_ia32_rdseed_bytes:
movl 12(%esp),%edi
movl 16(%esp),%ebx
cmpl $0,%ebx
je .L025done
je .L031done
movl $8,%ecx
.L026loop:
.L032loop:
.byte 15,199,250
jc .L027break
loop .L026loop
jmp .L025done
jc .L033break
loop .L032loop
jmp .L031done
.align 16
.L027break:
.L033break:
cmpl $4,%ebx
jb .L028tail
jb .L034tail
movl %edx,(%edi)
leal 4(%edi),%edi
addl $4,%eax
subl $4,%ebx
jz .L025done
jz .L031done
movl $8,%ecx
jmp .L026loop
jmp .L032loop
.align 16
.L028tail:
.L034tail:
movb %dl,(%edi)
leal 1(%edi),%edi
incl %eax
shrl $8,%edx
decl %ebx
jnz .L028tail
.L025done:
jnz .L034tail
.L031done:
xorl %edx,%edx
popl %ebx
popl %edi