Regen assembly files for i386 after r338846.
This commit is contained in:
parent
6245169c78
commit
29fa2db5f4
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -2746,6 +2746,8 @@ ecp_nistz256_to_mont:
|
||||
call _picup_eax
|
||||
.L000pic:
|
||||
leal .LRR-.L000pic(%eax),%ebp
|
||||
leal OPENSSL_ia32cap_P-.L000pic(%eax),%eax
|
||||
movl (%eax),%eax
|
||||
movl 20(%esp),%edi
|
||||
call _ecp_nistz256_mul_mont
|
||||
popl %edi
|
||||
@ -2767,6 +2769,8 @@ ecp_nistz256_from_mont:
|
||||
call _picup_eax
|
||||
.L001pic:
|
||||
leal .LONE-.L001pic(%eax),%ebp
|
||||
leal OPENSSL_ia32cap_P-.L001pic(%eax),%eax
|
||||
movl (%eax),%eax
|
||||
movl 20(%esp),%edi
|
||||
call _ecp_nistz256_mul_mont
|
||||
popl %edi
|
||||
@ -2786,6 +2790,10 @@ ecp_nistz256_mul_mont:
|
||||
pushl %edi
|
||||
movl 24(%esp),%esi
|
||||
movl 28(%esp),%ebp
|
||||
call _picup_eax
|
||||
.L002pic:
|
||||
leal OPENSSL_ia32cap_P-.L002pic(%eax),%eax
|
||||
movl (%eax),%eax
|
||||
movl 20(%esp),%edi
|
||||
call _ecp_nistz256_mul_mont
|
||||
popl %edi
|
||||
@ -2804,6 +2812,10 @@ ecp_nistz256_sqr_mont:
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 24(%esp),%esi
|
||||
call _picup_eax
|
||||
.L003pic:
|
||||
leal OPENSSL_ia32cap_P-.L003pic(%eax),%eax
|
||||
movl (%eax),%eax
|
||||
movl 20(%esp),%edi
|
||||
movl %esi,%ebp
|
||||
call _ecp_nistz256_mul_mont
|
||||
@ -2816,6 +2828,265 @@ ecp_nistz256_sqr_mont:
|
||||
.type _ecp_nistz256_mul_mont,@function
|
||||
.align 16
|
||||
_ecp_nistz256_mul_mont:
|
||||
andl $83886080,%eax
|
||||
cmpl $83886080,%eax
|
||||
jne .L004mul_mont_ialu
|
||||
movl %esp,%edx
|
||||
subl $256,%esp
|
||||
movd (%ebp),%xmm7
|
||||
leal 4(%ebp),%ebp
|
||||
pcmpeqd %xmm6,%xmm6
|
||||
psrlq $48,%xmm6
|
||||
pshuflw $220,%xmm7,%xmm7
|
||||
andl $-64,%esp
|
||||
pshufd $220,%xmm7,%xmm7
|
||||
leal 128(%esp),%ebx
|
||||
movd (%esi),%xmm0
|
||||
pshufd $204,%xmm0,%xmm0
|
||||
movd 4(%esi),%xmm1
|
||||
movdqa %xmm0,(%ebx)
|
||||
pmuludq %xmm7,%xmm0
|
||||
movd 8(%esi),%xmm2
|
||||
pshufd $204,%xmm1,%xmm1
|
||||
movdqa %xmm1,16(%ebx)
|
||||
pmuludq %xmm7,%xmm1
|
||||
movq %xmm0,%xmm4
|
||||
pslldq $6,%xmm4
|
||||
paddq %xmm0,%xmm4
|
||||
movdqa %xmm4,%xmm5
|
||||
psrldq $10,%xmm4
|
||||
pand %xmm6,%xmm5
|
||||
movd 12(%esi),%xmm3
|
||||
pshufd $204,%xmm2,%xmm2
|
||||
movdqa %xmm2,32(%ebx)
|
||||
pmuludq %xmm7,%xmm2
|
||||
paddq %xmm4,%xmm1
|
||||
movdqa %xmm1,(%esp)
|
||||
movd 16(%esi),%xmm0
|
||||
pshufd $204,%xmm3,%xmm3
|
||||
movdqa %xmm3,48(%ebx)
|
||||
pmuludq %xmm7,%xmm3
|
||||
movdqa %xmm2,16(%esp)
|
||||
movd 20(%esi),%xmm1
|
||||
pshufd $204,%xmm0,%xmm0
|
||||
movdqa %xmm0,64(%ebx)
|
||||
pmuludq %xmm7,%xmm0
|
||||
paddq %xmm5,%xmm3
|
||||
movdqa %xmm3,32(%esp)
|
||||
movd 24(%esi),%xmm2
|
||||
pshufd $204,%xmm1,%xmm1
|
||||
movdqa %xmm1,80(%ebx)
|
||||
pmuludq %xmm7,%xmm1
|
||||
movdqa %xmm0,48(%esp)
|
||||
pshufd $177,%xmm5,%xmm4
|
||||
movd 28(%esi),%xmm3
|
||||
pshufd $204,%xmm2,%xmm2
|
||||
movdqa %xmm2,96(%ebx)
|
||||
pmuludq %xmm7,%xmm2
|
||||
movdqa %xmm1,64(%esp)
|
||||
psubq %xmm5,%xmm4
|
||||
movd (%ebp),%xmm0
|
||||
pshufd $204,%xmm3,%xmm3
|
||||
movdqa %xmm3,112(%ebx)
|
||||
pmuludq %xmm7,%xmm3
|
||||
pshuflw $220,%xmm0,%xmm7
|
||||
movdqa (%ebx),%xmm0
|
||||
pshufd $220,%xmm7,%xmm7
|
||||
movl $6,%ecx
|
||||
leal 4(%ebp),%ebp
|
||||
jmp .L005madd_sse2
|
||||
.align 16
|
||||
.L005madd_sse2:
|
||||
paddq %xmm5,%xmm2
|
||||
paddq %xmm4,%xmm3
|
||||
movdqa 16(%ebx),%xmm1
|
||||
pmuludq %xmm7,%xmm0
|
||||
movdqa %xmm2,80(%esp)
|
||||
movdqa 32(%ebx),%xmm2
|
||||
pmuludq %xmm7,%xmm1
|
||||
movdqa %xmm3,96(%esp)
|
||||
paddq (%esp),%xmm0
|
||||
movdqa 48(%ebx),%xmm3
|
||||
pmuludq %xmm7,%xmm2
|
||||
movq %xmm0,%xmm4
|
||||
pslldq $6,%xmm4
|
||||
paddq 16(%esp),%xmm1
|
||||
paddq %xmm0,%xmm4
|
||||
movdqa %xmm4,%xmm5
|
||||
psrldq $10,%xmm4
|
||||
movdqa 64(%ebx),%xmm0
|
||||
pmuludq %xmm7,%xmm3
|
||||
paddq %xmm4,%xmm1
|
||||
paddq 32(%esp),%xmm2
|
||||
movdqa %xmm1,(%esp)
|
||||
movdqa 80(%ebx),%xmm1
|
||||
pmuludq %xmm7,%xmm0
|
||||
paddq 48(%esp),%xmm3
|
||||
movdqa %xmm2,16(%esp)
|
||||
pand %xmm6,%xmm5
|
||||
movdqa 96(%ebx),%xmm2
|
||||
pmuludq %xmm7,%xmm1
|
||||
paddq %xmm5,%xmm3
|
||||
paddq 64(%esp),%xmm0
|
||||
movdqa %xmm3,32(%esp)
|
||||
pshufd $177,%xmm5,%xmm4
|
||||
movdqa %xmm7,%xmm3
|
||||
pmuludq %xmm7,%xmm2
|
||||
movd (%ebp),%xmm7
|
||||
leal 4(%ebp),%ebp
|
||||
paddq 80(%esp),%xmm1
|
||||
psubq %xmm5,%xmm4
|
||||
movdqa %xmm0,48(%esp)
|
||||
pshuflw $220,%xmm7,%xmm7
|
||||
pmuludq 112(%ebx),%xmm3
|
||||
pshufd $220,%xmm7,%xmm7
|
||||
movdqa (%ebx),%xmm0
|
||||
movdqa %xmm1,64(%esp)
|
||||
paddq 96(%esp),%xmm2
|
||||
decl %ecx
|
||||
jnz .L005madd_sse2
|
||||
paddq %xmm5,%xmm2
|
||||
paddq %xmm4,%xmm3
|
||||
movdqa 16(%ebx),%xmm1
|
||||
pmuludq %xmm7,%xmm0
|
||||
movdqa %xmm2,80(%esp)
|
||||
movdqa 32(%ebx),%xmm2
|
||||
pmuludq %xmm7,%xmm1
|
||||
movdqa %xmm3,96(%esp)
|
||||
paddq (%esp),%xmm0
|
||||
movdqa 48(%ebx),%xmm3
|
||||
pmuludq %xmm7,%xmm2
|
||||
movq %xmm0,%xmm4
|
||||
pslldq $6,%xmm4
|
||||
paddq 16(%esp),%xmm1
|
||||
paddq %xmm0,%xmm4
|
||||
movdqa %xmm4,%xmm5
|
||||
psrldq $10,%xmm4
|
||||
movdqa 64(%ebx),%xmm0
|
||||
pmuludq %xmm7,%xmm3
|
||||
paddq %xmm4,%xmm1
|
||||
paddq 32(%esp),%xmm2
|
||||
movdqa %xmm1,(%esp)
|
||||
movdqa 80(%ebx),%xmm1
|
||||
pmuludq %xmm7,%xmm0
|
||||
paddq 48(%esp),%xmm3
|
||||
movdqa %xmm2,16(%esp)
|
||||
pand %xmm6,%xmm5
|
||||
movdqa 96(%ebx),%xmm2
|
||||
pmuludq %xmm7,%xmm1
|
||||
paddq %xmm5,%xmm3
|
||||
paddq 64(%esp),%xmm0
|
||||
movdqa %xmm3,32(%esp)
|
||||
pshufd $177,%xmm5,%xmm4
|
||||
movdqa 112(%ebx),%xmm3
|
||||
pmuludq %xmm7,%xmm2
|
||||
paddq 80(%esp),%xmm1
|
||||
psubq %xmm5,%xmm4
|
||||
movdqa %xmm0,48(%esp)
|
||||
pmuludq %xmm7,%xmm3
|
||||
pcmpeqd %xmm7,%xmm7
|
||||
movdqa (%esp),%xmm0
|
||||
pslldq $8,%xmm7
|
||||
movdqa %xmm1,64(%esp)
|
||||
paddq 96(%esp),%xmm2
|
||||
paddq %xmm5,%xmm2
|
||||
paddq %xmm4,%xmm3
|
||||
movdqa %xmm2,80(%esp)
|
||||
movdqa %xmm3,96(%esp)
|
||||
movdqa 16(%esp),%xmm1
|
||||
movdqa 32(%esp),%xmm2
|
||||
movdqa 48(%esp),%xmm3
|
||||
movq %xmm0,%xmm4
|
||||
pand %xmm7,%xmm0
|
||||
xorl %ebp,%ebp
|
||||
pslldq $6,%xmm4
|
||||
movq %xmm1,%xmm5
|
||||
paddq %xmm4,%xmm0
|
||||
pand %xmm7,%xmm1
|
||||
psrldq $6,%xmm0
|
||||
movd %xmm0,%eax
|
||||
psrldq $4,%xmm0
|
||||
paddq %xmm0,%xmm5
|
||||
movdqa 64(%esp),%xmm0
|
||||
subl $-1,%eax
|
||||
pslldq $6,%xmm5
|
||||
movq %xmm2,%xmm4
|
||||
paddq %xmm5,%xmm1
|
||||
pand %xmm7,%xmm2
|
||||
psrldq $6,%xmm1
|
||||
movl %eax,(%edi)
|
||||
movd %xmm1,%eax
|
||||
psrldq $4,%xmm1
|
||||
paddq %xmm1,%xmm4
|
||||
movdqa 80(%esp),%xmm1
|
||||
sbbl $-1,%eax
|
||||
pslldq $6,%xmm4
|
||||
movq %xmm3,%xmm5
|
||||
paddq %xmm4,%xmm2
|
||||
pand %xmm7,%xmm3
|
||||
psrldq $6,%xmm2
|
||||
movl %eax,4(%edi)
|
||||
movd %xmm2,%eax
|
||||
psrldq $4,%xmm2
|
||||
paddq %xmm2,%xmm5
|
||||
movdqa 96(%esp),%xmm2
|
||||
sbbl $-1,%eax
|
||||
pslldq $6,%xmm5
|
||||
movq %xmm0,%xmm4
|
||||
paddq %xmm5,%xmm3
|
||||
pand %xmm7,%xmm0
|
||||
psrldq $6,%xmm3
|
||||
movl %eax,8(%edi)
|
||||
movd %xmm3,%eax
|
||||
psrldq $4,%xmm3
|
||||
paddq %xmm3,%xmm4
|
||||
sbbl $0,%eax
|
||||
pslldq $6,%xmm4
|
||||
movq %xmm1,%xmm5
|
||||
paddq %xmm4,%xmm0
|
||||
pand %xmm7,%xmm1
|
||||
psrldq $6,%xmm0
|
||||
movl %eax,12(%edi)
|
||||
movd %xmm0,%eax
|
||||
psrldq $4,%xmm0
|
||||
paddq %xmm0,%xmm5
|
||||
sbbl $0,%eax
|
||||
pslldq $6,%xmm5
|
||||
movq %xmm2,%xmm4
|
||||
paddq %xmm5,%xmm1
|
||||
pand %xmm7,%xmm2
|
||||
psrldq $6,%xmm1
|
||||
movd %xmm1,%ebx
|
||||
psrldq $4,%xmm1
|
||||
movl %edx,%esp
|
||||
paddq %xmm1,%xmm4
|
||||
pslldq $6,%xmm4
|
||||
paddq %xmm4,%xmm2
|
||||
psrldq $6,%xmm2
|
||||
movd %xmm2,%ecx
|
||||
psrldq $4,%xmm2
|
||||
sbbl $0,%ebx
|
||||
movd %xmm2,%edx
|
||||
pextrw $2,%xmm2,%esi
|
||||
sbbl $1,%ecx
|
||||
sbbl $-1,%edx
|
||||
sbbl $0,%esi
|
||||
subl %esi,%ebp
|
||||
addl %esi,(%edi)
|
||||
adcl %esi,4(%edi)
|
||||
adcl %esi,8(%edi)
|
||||
adcl $0,12(%edi)
|
||||
adcl $0,%eax
|
||||
adcl $0,%ebx
|
||||
movl %eax,16(%edi)
|
||||
adcl %ebp,%ecx
|
||||
movl %ebx,20(%edi)
|
||||
adcl %esi,%edx
|
||||
movl %ecx,24(%edi)
|
||||
movl %edx,28(%edi)
|
||||
ret
|
||||
.align 16
|
||||
.L004mul_mont_ialu:
|
||||
subl $40,%esp
|
||||
movl (%esi),%eax
|
||||
movl (%ebp),%ebx
|
||||
@ -3463,7 +3734,7 @@ ecp_nistz256_scatter_w5:
|
||||
movl 28(%esp),%ebp
|
||||
leal 124(%edi,%ebp,4),%edi
|
||||
movl $6,%ebp
|
||||
.L002scatter_w5_loop:
|
||||
.L006scatter_w5_loop:
|
||||
movl (%esi),%eax
|
||||
movl 4(%esi),%ebx
|
||||
movl 8(%esi),%ecx
|
||||
@ -3475,7 +3746,7 @@ ecp_nistz256_scatter_w5:
|
||||
movl %edx,64(%edi)
|
||||
leal 256(%edi),%edi
|
||||
decl %ebp
|
||||
jnz .L002scatter_w5_loop
|
||||
jnz .L006scatter_w5_loop
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
@ -3590,7 +3861,7 @@ ecp_nistz256_scatter_w7:
|
||||
movl 28(%esp),%ebp
|
||||
leal (%edi,%ebp,1),%edi
|
||||
movl $16,%ebp
|
||||
.L003scatter_w7_loop:
|
||||
.L007scatter_w7_loop:
|
||||
movl (%esi),%eax
|
||||
leal 4(%esi),%esi
|
||||
movb %al,(%edi)
|
||||
@ -3600,7 +3871,7 @@ ecp_nistz256_scatter_w7:
|
||||
movb %ah,192(%edi)
|
||||
leal 256(%edi),%edi
|
||||
decl %ebp
|
||||
jnz .L003scatter_w7_loop
|
||||
jnz .L007scatter_w7_loop
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
@ -3832,6 +4103,10 @@ ecp_nistz256_point_double:
|
||||
pushl %edi
|
||||
movl 24(%esp),%esi
|
||||
subl $164,%esp
|
||||
call _picup_eax
|
||||
.L008pic:
|
||||
leal OPENSSL_ia32cap_P-.L008pic(%eax),%edx
|
||||
movl (%edx),%ebp
|
||||
.Lpoint_double_shortcut:
|
||||
movl (%esi),%eax
|
||||
movl 4(%esi),%ebx
|
||||
@ -3954,6 +4229,10 @@ ecp_nistz256_point_add:
|
||||
pushl %edi
|
||||
movl 28(%esp),%esi
|
||||
subl $596,%esp
|
||||
call _picup_eax
|
||||
.L009pic:
|
||||
leal OPENSSL_ia32cap_P-.L009pic(%eax),%edx
|
||||
movl (%edx),%ebp
|
||||
leal 192(%esp),%edi
|
||||
movl (%esi),%eax
|
||||
movl 4(%esi),%ebx
|
||||
@ -4144,26 +4423,26 @@ ecp_nistz256_point_add:
|
||||
orl 8(%edi),%eax
|
||||
orl 12(%edi),%eax
|
||||
.byte 62
|
||||
jnz .L004add_proceed
|
||||
jnz .L010add_proceed
|
||||
movl 576(%esp),%eax
|
||||
andl 580(%esp),%eax
|
||||
movl 584(%esp),%ebx
|
||||
jz .L004add_proceed
|
||||
jz .L010add_proceed
|
||||
testl %ebx,%ebx
|
||||
jz .L005add_double
|
||||
jz .L011add_double
|
||||
movl 616(%esp),%edi
|
||||
xorl %eax,%eax
|
||||
movl $24,%ecx
|
||||
.byte 252,243,171
|
||||
jmp .L006add_done
|
||||
jmp .L012add_done
|
||||
.align 16
|
||||
.L005add_double:
|
||||
.L011add_double:
|
||||
movl 620(%esp),%esi
|
||||
movl 588(%esp),%ebp
|
||||
addl $432,%esp
|
||||
jmp .Lpoint_double_shortcut
|
||||
.align 16
|
||||
.L004add_proceed:
|
||||
.L010add_proceed:
|
||||
movl 588(%esp),%eax
|
||||
leal 352(%esp),%esi
|
||||
leal 352(%esp),%ebp
|
||||
@ -4448,7 +4727,7 @@ ecp_nistz256_point_add:
|
||||
orl %ebx,%eax
|
||||
orl %ecx,%eax
|
||||
movl %eax,60(%edi)
|
||||
.L006add_done:
|
||||
.L012add_done:
|
||||
addl $596,%esp
|
||||
popl %edi
|
||||
popl %esi
|
||||
@ -4467,6 +4746,10 @@ ecp_nistz256_point_add_affine:
|
||||
pushl %edi
|
||||
movl 24(%esp),%esi
|
||||
subl $492,%esp
|
||||
call _picup_eax
|
||||
.L013pic:
|
||||
leal OPENSSL_ia32cap_P-.L013pic(%eax),%edx
|
||||
movl (%edx),%ebp
|
||||
leal 96(%esp),%edi
|
||||
movl (%esi),%eax
|
||||
movl 4(%esi),%ebx
|
||||
@ -4884,6 +5167,7 @@ ecp_nistz256_point_add_affine:
|
||||
popl %ebp
|
||||
ret
|
||||
.size ecp_nistz256_point_add_affine,.-.L_ecp_nistz256_point_add_affine_begin
|
||||
.comm OPENSSL_ia32cap_P,16,4
|
||||
#else
|
||||
.text
|
||||
.globl ecp_nistz256_precomputed
|
||||
@ -7630,6 +7914,8 @@ ecp_nistz256_to_mont:
|
||||
call _picup_eax
|
||||
.L000pic:
|
||||
leal .LRR-.L000pic(%eax),%ebp
|
||||
leal OPENSSL_ia32cap_P,%eax
|
||||
movl (%eax),%eax
|
||||
movl 20(%esp),%edi
|
||||
call _ecp_nistz256_mul_mont
|
||||
popl %edi
|
||||
@ -7651,6 +7937,8 @@ ecp_nistz256_from_mont:
|
||||
call _picup_eax
|
||||
.L001pic:
|
||||
leal .LONE-.L001pic(%eax),%ebp
|
||||
leal OPENSSL_ia32cap_P,%eax
|
||||
movl (%eax),%eax
|
||||
movl 20(%esp),%edi
|
||||
call _ecp_nistz256_mul_mont
|
||||
popl %edi
|
||||
@ -7670,6 +7958,10 @@ ecp_nistz256_mul_mont:
|
||||
pushl %edi
|
||||
movl 24(%esp),%esi
|
||||
movl 28(%esp),%ebp
|
||||
call _picup_eax
|
||||
.L002pic:
|
||||
leal OPENSSL_ia32cap_P,%eax
|
||||
movl (%eax),%eax
|
||||
movl 20(%esp),%edi
|
||||
call _ecp_nistz256_mul_mont
|
||||
popl %edi
|
||||
@ -7688,6 +7980,10 @@ ecp_nistz256_sqr_mont:
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 24(%esp),%esi
|
||||
call _picup_eax
|
||||
.L003pic:
|
||||
leal OPENSSL_ia32cap_P,%eax
|
||||
movl (%eax),%eax
|
||||
movl 20(%esp),%edi
|
||||
movl %esi,%ebp
|
||||
call _ecp_nistz256_mul_mont
|
||||
@ -7700,6 +7996,265 @@ ecp_nistz256_sqr_mont:
|
||||
.type _ecp_nistz256_mul_mont,@function
|
||||
.align 16
|
||||
_ecp_nistz256_mul_mont:
|
||||
andl $83886080,%eax
|
||||
cmpl $83886080,%eax
|
||||
jne .L004mul_mont_ialu
|
||||
movl %esp,%edx
|
||||
subl $256,%esp
|
||||
movd (%ebp),%xmm7
|
||||
leal 4(%ebp),%ebp
|
||||
pcmpeqd %xmm6,%xmm6
|
||||
psrlq $48,%xmm6
|
||||
pshuflw $220,%xmm7,%xmm7
|
||||
andl $-64,%esp
|
||||
pshufd $220,%xmm7,%xmm7
|
||||
leal 128(%esp),%ebx
|
||||
movd (%esi),%xmm0
|
||||
pshufd $204,%xmm0,%xmm0
|
||||
movd 4(%esi),%xmm1
|
||||
movdqa %xmm0,(%ebx)
|
||||
pmuludq %xmm7,%xmm0
|
||||
movd 8(%esi),%xmm2
|
||||
pshufd $204,%xmm1,%xmm1
|
||||
movdqa %xmm1,16(%ebx)
|
||||
pmuludq %xmm7,%xmm1
|
||||
movq %xmm0,%xmm4
|
||||
pslldq $6,%xmm4
|
||||
paddq %xmm0,%xmm4
|
||||
movdqa %xmm4,%xmm5
|
||||
psrldq $10,%xmm4
|
||||
pand %xmm6,%xmm5
|
||||
movd 12(%esi),%xmm3
|
||||
pshufd $204,%xmm2,%xmm2
|
||||
movdqa %xmm2,32(%ebx)
|
||||
pmuludq %xmm7,%xmm2
|
||||
paddq %xmm4,%xmm1
|
||||
movdqa %xmm1,(%esp)
|
||||
movd 16(%esi),%xmm0
|
||||
pshufd $204,%xmm3,%xmm3
|
||||
movdqa %xmm3,48(%ebx)
|
||||
pmuludq %xmm7,%xmm3
|
||||
movdqa %xmm2,16(%esp)
|
||||
movd 20(%esi),%xmm1
|
||||
pshufd $204,%xmm0,%xmm0
|
||||
movdqa %xmm0,64(%ebx)
|
||||
pmuludq %xmm7,%xmm0
|
||||
paddq %xmm5,%xmm3
|
||||
movdqa %xmm3,32(%esp)
|
||||
movd 24(%esi),%xmm2
|
||||
pshufd $204,%xmm1,%xmm1
|
||||
movdqa %xmm1,80(%ebx)
|
||||
pmuludq %xmm7,%xmm1
|
||||
movdqa %xmm0,48(%esp)
|
||||
pshufd $177,%xmm5,%xmm4
|
||||
movd 28(%esi),%xmm3
|
||||
pshufd $204,%xmm2,%xmm2
|
||||
movdqa %xmm2,96(%ebx)
|
||||
pmuludq %xmm7,%xmm2
|
||||
movdqa %xmm1,64(%esp)
|
||||
psubq %xmm5,%xmm4
|
||||
movd (%ebp),%xmm0
|
||||
pshufd $204,%xmm3,%xmm3
|
||||
movdqa %xmm3,112(%ebx)
|
||||
pmuludq %xmm7,%xmm3
|
||||
pshuflw $220,%xmm0,%xmm7
|
||||
movdqa (%ebx),%xmm0
|
||||
pshufd $220,%xmm7,%xmm7
|
||||
movl $6,%ecx
|
||||
leal 4(%ebp),%ebp
|
||||
jmp .L005madd_sse2
|
||||
.align 16
|
||||
.L005madd_sse2:
|
||||
paddq %xmm5,%xmm2
|
||||
paddq %xmm4,%xmm3
|
||||
movdqa 16(%ebx),%xmm1
|
||||
pmuludq %xmm7,%xmm0
|
||||
movdqa %xmm2,80(%esp)
|
||||
movdqa 32(%ebx),%xmm2
|
||||
pmuludq %xmm7,%xmm1
|
||||
movdqa %xmm3,96(%esp)
|
||||
paddq (%esp),%xmm0
|
||||
movdqa 48(%ebx),%xmm3
|
||||
pmuludq %xmm7,%xmm2
|
||||
movq %xmm0,%xmm4
|
||||
pslldq $6,%xmm4
|
||||
paddq 16(%esp),%xmm1
|
||||
paddq %xmm0,%xmm4
|
||||
movdqa %xmm4,%xmm5
|
||||
psrldq $10,%xmm4
|
||||
movdqa 64(%ebx),%xmm0
|
||||
pmuludq %xmm7,%xmm3
|
||||
paddq %xmm4,%xmm1
|
||||
paddq 32(%esp),%xmm2
|
||||
movdqa %xmm1,(%esp)
|
||||
movdqa 80(%ebx),%xmm1
|
||||
pmuludq %xmm7,%xmm0
|
||||
paddq 48(%esp),%xmm3
|
||||
movdqa %xmm2,16(%esp)
|
||||
pand %xmm6,%xmm5
|
||||
movdqa 96(%ebx),%xmm2
|
||||
pmuludq %xmm7,%xmm1
|
||||
paddq %xmm5,%xmm3
|
||||
paddq 64(%esp),%xmm0
|
||||
movdqa %xmm3,32(%esp)
|
||||
pshufd $177,%xmm5,%xmm4
|
||||
movdqa %xmm7,%xmm3
|
||||
pmuludq %xmm7,%xmm2
|
||||
movd (%ebp),%xmm7
|
||||
leal 4(%ebp),%ebp
|
||||
paddq 80(%esp),%xmm1
|
||||
psubq %xmm5,%xmm4
|
||||
movdqa %xmm0,48(%esp)
|
||||
pshuflw $220,%xmm7,%xmm7
|
||||
pmuludq 112(%ebx),%xmm3
|
||||
pshufd $220,%xmm7,%xmm7
|
||||
movdqa (%ebx),%xmm0
|
||||
movdqa %xmm1,64(%esp)
|
||||
paddq 96(%esp),%xmm2
|
||||
decl %ecx
|
||||
jnz .L005madd_sse2
|
||||
paddq %xmm5,%xmm2
|
||||
paddq %xmm4,%xmm3
|
||||
movdqa 16(%ebx),%xmm1
|
||||
pmuludq %xmm7,%xmm0
|
||||
movdqa %xmm2,80(%esp)
|
||||
movdqa 32(%ebx),%xmm2
|
||||
pmuludq %xmm7,%xmm1
|
||||
movdqa %xmm3,96(%esp)
|
||||
paddq (%esp),%xmm0
|
||||
movdqa 48(%ebx),%xmm3
|
||||
pmuludq %xmm7,%xmm2
|
||||
movq %xmm0,%xmm4
|
||||
pslldq $6,%xmm4
|
||||
paddq 16(%esp),%xmm1
|
||||
paddq %xmm0,%xmm4
|
||||
movdqa %xmm4,%xmm5
|
||||
psrldq $10,%xmm4
|
||||
movdqa 64(%ebx),%xmm0
|
||||
pmuludq %xmm7,%xmm3
|
||||
paddq %xmm4,%xmm1
|
||||
paddq 32(%esp),%xmm2
|
||||
movdqa %xmm1,(%esp)
|
||||
movdqa 80(%ebx),%xmm1
|
||||
pmuludq %xmm7,%xmm0
|
||||
paddq 48(%esp),%xmm3
|
||||
movdqa %xmm2,16(%esp)
|
||||
pand %xmm6,%xmm5
|
||||
movdqa 96(%ebx),%xmm2
|
||||
pmuludq %xmm7,%xmm1
|
||||
paddq %xmm5,%xmm3
|
||||
paddq 64(%esp),%xmm0
|
||||
movdqa %xmm3,32(%esp)
|
||||
pshufd $177,%xmm5,%xmm4
|
||||
movdqa 112(%ebx),%xmm3
|
||||
pmuludq %xmm7,%xmm2
|
||||
paddq 80(%esp),%xmm1
|
||||
psubq %xmm5,%xmm4
|
||||
movdqa %xmm0,48(%esp)
|
||||
pmuludq %xmm7,%xmm3
|
||||
pcmpeqd %xmm7,%xmm7
|
||||
movdqa (%esp),%xmm0
|
||||
pslldq $8,%xmm7
|
||||
movdqa %xmm1,64(%esp)
|
||||
paddq 96(%esp),%xmm2
|
||||
paddq %xmm5,%xmm2
|
||||
paddq %xmm4,%xmm3
|
||||
movdqa %xmm2,80(%esp)
|
||||
movdqa %xmm3,96(%esp)
|
||||
movdqa 16(%esp),%xmm1
|
||||
movdqa 32(%esp),%xmm2
|
||||
movdqa 48(%esp),%xmm3
|
||||
movq %xmm0,%xmm4
|
||||
pand %xmm7,%xmm0
|
||||
xorl %ebp,%ebp
|
||||
pslldq $6,%xmm4
|
||||
movq %xmm1,%xmm5
|
||||
paddq %xmm4,%xmm0
|
||||
pand %xmm7,%xmm1
|
||||
psrldq $6,%xmm0
|
||||
movd %xmm0,%eax
|
||||
psrldq $4,%xmm0
|
||||
paddq %xmm0,%xmm5
|
||||
movdqa 64(%esp),%xmm0
|
||||
subl $-1,%eax
|
||||
pslldq $6,%xmm5
|
||||
movq %xmm2,%xmm4
|
||||
paddq %xmm5,%xmm1
|
||||
pand %xmm7,%xmm2
|
||||
psrldq $6,%xmm1
|
||||
movl %eax,(%edi)
|
||||
movd %xmm1,%eax
|
||||
psrldq $4,%xmm1
|
||||
paddq %xmm1,%xmm4
|
||||
movdqa 80(%esp),%xmm1
|
||||
sbbl $-1,%eax
|
||||
pslldq $6,%xmm4
|
||||
movq %xmm3,%xmm5
|
||||
paddq %xmm4,%xmm2
|
||||
pand %xmm7,%xmm3
|
||||
psrldq $6,%xmm2
|
||||
movl %eax,4(%edi)
|
||||
movd %xmm2,%eax
|
||||
psrldq $4,%xmm2
|
||||
paddq %xmm2,%xmm5
|
||||
movdqa 96(%esp),%xmm2
|
||||
sbbl $-1,%eax
|
||||
pslldq $6,%xmm5
|
||||
movq %xmm0,%xmm4
|
||||
paddq %xmm5,%xmm3
|
||||
pand %xmm7,%xmm0
|
||||
psrldq $6,%xmm3
|
||||
movl %eax,8(%edi)
|
||||
movd %xmm3,%eax
|
||||
psrldq $4,%xmm3
|
||||
paddq %xmm3,%xmm4
|
||||
sbbl $0,%eax
|
||||
pslldq $6,%xmm4
|
||||
movq %xmm1,%xmm5
|
||||
paddq %xmm4,%xmm0
|
||||
pand %xmm7,%xmm1
|
||||
psrldq $6,%xmm0
|
||||
movl %eax,12(%edi)
|
||||
movd %xmm0,%eax
|
||||
psrldq $4,%xmm0
|
||||
paddq %xmm0,%xmm5
|
||||
sbbl $0,%eax
|
||||
pslldq $6,%xmm5
|
||||
movq %xmm2,%xmm4
|
||||
paddq %xmm5,%xmm1
|
||||
pand %xmm7,%xmm2
|
||||
psrldq $6,%xmm1
|
||||
movd %xmm1,%ebx
|
||||
psrldq $4,%xmm1
|
||||
movl %edx,%esp
|
||||
paddq %xmm1,%xmm4
|
||||
pslldq $6,%xmm4
|
||||
paddq %xmm4,%xmm2
|
||||
psrldq $6,%xmm2
|
||||
movd %xmm2,%ecx
|
||||
psrldq $4,%xmm2
|
||||
sbbl $0,%ebx
|
||||
movd %xmm2,%edx
|
||||
pextrw $2,%xmm2,%esi
|
||||
sbbl $1,%ecx
|
||||
sbbl $-1,%edx
|
||||
sbbl $0,%esi
|
||||
subl %esi,%ebp
|
||||
addl %esi,(%edi)
|
||||
adcl %esi,4(%edi)
|
||||
adcl %esi,8(%edi)
|
||||
adcl $0,12(%edi)
|
||||
adcl $0,%eax
|
||||
adcl $0,%ebx
|
||||
movl %eax,16(%edi)
|
||||
adcl %ebp,%ecx
|
||||
movl %ebx,20(%edi)
|
||||
adcl %esi,%edx
|
||||
movl %ecx,24(%edi)
|
||||
movl %edx,28(%edi)
|
||||
ret
|
||||
.align 16
|
||||
.L004mul_mont_ialu:
|
||||
subl $40,%esp
|
||||
movl (%esi),%eax
|
||||
movl (%ebp),%ebx
|
||||
@ -8347,7 +8902,7 @@ ecp_nistz256_scatter_w5:
|
||||
movl 28(%esp),%ebp
|
||||
leal 124(%edi,%ebp,4),%edi
|
||||
movl $6,%ebp
|
||||
.L002scatter_w5_loop:
|
||||
.L006scatter_w5_loop:
|
||||
movl (%esi),%eax
|
||||
movl 4(%esi),%ebx
|
||||
movl 8(%esi),%ecx
|
||||
@ -8359,7 +8914,7 @@ ecp_nistz256_scatter_w5:
|
||||
movl %edx,64(%edi)
|
||||
leal 256(%edi),%edi
|
||||
decl %ebp
|
||||
jnz .L002scatter_w5_loop
|
||||
jnz .L006scatter_w5_loop
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
@ -8474,7 +9029,7 @@ ecp_nistz256_scatter_w7:
|
||||
movl 28(%esp),%ebp
|
||||
leal (%edi,%ebp,1),%edi
|
||||
movl $16,%ebp
|
||||
.L003scatter_w7_loop:
|
||||
.L007scatter_w7_loop:
|
||||
movl (%esi),%eax
|
||||
leal 4(%esi),%esi
|
||||
movb %al,(%edi)
|
||||
@ -8484,7 +9039,7 @@ ecp_nistz256_scatter_w7:
|
||||
movb %ah,192(%edi)
|
||||
leal 256(%edi),%edi
|
||||
decl %ebp
|
||||
jnz .L003scatter_w7_loop
|
||||
jnz .L007scatter_w7_loop
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
@ -8716,6 +9271,10 @@ ecp_nistz256_point_double:
|
||||
pushl %edi
|
||||
movl 24(%esp),%esi
|
||||
subl $164,%esp
|
||||
call _picup_eax
|
||||
.L008pic:
|
||||
leal OPENSSL_ia32cap_P,%edx
|
||||
movl (%edx),%ebp
|
||||
.Lpoint_double_shortcut:
|
||||
movl (%esi),%eax
|
||||
movl 4(%esi),%ebx
|
||||
@ -8838,6 +9397,10 @@ ecp_nistz256_point_add:
|
||||
pushl %edi
|
||||
movl 28(%esp),%esi
|
||||
subl $596,%esp
|
||||
call _picup_eax
|
||||
.L009pic:
|
||||
leal OPENSSL_ia32cap_P,%edx
|
||||
movl (%edx),%ebp
|
||||
leal 192(%esp),%edi
|
||||
movl (%esi),%eax
|
||||
movl 4(%esi),%ebx
|
||||
@ -9028,26 +9591,26 @@ ecp_nistz256_point_add:
|
||||
orl 8(%edi),%eax
|
||||
orl 12(%edi),%eax
|
||||
.byte 62
|
||||
jnz .L004add_proceed
|
||||
jnz .L010add_proceed
|
||||
movl 576(%esp),%eax
|
||||
andl 580(%esp),%eax
|
||||
movl 584(%esp),%ebx
|
||||
jz .L004add_proceed
|
||||
jz .L010add_proceed
|
||||
testl %ebx,%ebx
|
||||
jz .L005add_double
|
||||
jz .L011add_double
|
||||
movl 616(%esp),%edi
|
||||
xorl %eax,%eax
|
||||
movl $24,%ecx
|
||||
.byte 252,243,171
|
||||
jmp .L006add_done
|
||||
jmp .L012add_done
|
||||
.align 16
|
||||
.L005add_double:
|
||||
.L011add_double:
|
||||
movl 620(%esp),%esi
|
||||
movl 588(%esp),%ebp
|
||||
addl $432,%esp
|
||||
jmp .Lpoint_double_shortcut
|
||||
.align 16
|
||||
.L004add_proceed:
|
||||
.L010add_proceed:
|
||||
movl 588(%esp),%eax
|
||||
leal 352(%esp),%esi
|
||||
leal 352(%esp),%ebp
|
||||
@ -9332,7 +9895,7 @@ ecp_nistz256_point_add:
|
||||
orl %ebx,%eax
|
||||
orl %ecx,%eax
|
||||
movl %eax,60(%edi)
|
||||
.L006add_done:
|
||||
.L012add_done:
|
||||
addl $596,%esp
|
||||
popl %edi
|
||||
popl %esi
|
||||
@ -9351,6 +9914,10 @@ ecp_nistz256_point_add_affine:
|
||||
pushl %edi
|
||||
movl 24(%esp),%esi
|
||||
subl $492,%esp
|
||||
call _picup_eax
|
||||
.L013pic:
|
||||
leal OPENSSL_ia32cap_P,%edx
|
||||
movl (%edx),%ebp
|
||||
leal 96(%esp),%edi
|
||||
movl (%esi),%eax
|
||||
movl 4(%esi),%ebx
|
||||
@ -9768,4 +10335,5 @@ ecp_nistz256_point_add_affine:
|
||||
popl %ebp
|
||||
ret
|
||||
.size ecp_nistz256_point_add_affine,.-.L_ecp_nistz256_point_add_affine_begin
|
||||
.comm OPENSSL_ia32cap_P,16,4
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -250,6 +250,18 @@ bn_GF2m_mul_2x2:
|
||||
movl 4(%edx),%edx
|
||||
testl $8388608,%eax
|
||||
jz .L001ialu
|
||||
testl $16777216,%eax
|
||||
jz .L002mmx
|
||||
testl $2,%edx
|
||||
jz .L002mmx
|
||||
movups 8(%esp),%xmm0
|
||||
shufps $177,%xmm0,%xmm0
|
||||
.byte 102,15,58,68,192,1
|
||||
movl 4(%esp),%eax
|
||||
movups %xmm0,(%eax)
|
||||
ret
|
||||
.align 16
|
||||
.L002mmx:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
@ -581,6 +593,18 @@ bn_GF2m_mul_2x2:
|
||||
movl 4(%edx),%edx
|
||||
testl $8388608,%eax
|
||||
jz .L000ialu
|
||||
testl $16777216,%eax
|
||||
jz .L001mmx
|
||||
testl $2,%edx
|
||||
jz .L001mmx
|
||||
movups 8(%esp),%xmm0
|
||||
shufps $177,%xmm0,%xmm0
|
||||
.byte 102,15,58,68,192,1
|
||||
movl 4(%esp),%eax
|
||||
movups %xmm0,(%eax)
|
||||
ret
|
||||
.align 16
|
||||
.L001mmx:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
|
@ -59,6 +59,126 @@ bn_mul_mont:
|
||||
movl %esi,20(%esp)
|
||||
leal -3(%edi),%ebx
|
||||
movl %edx,24(%esp)
|
||||
call .L003PIC_me_up
|
||||
.L003PIC_me_up:
|
||||
popl %eax
|
||||
leal OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax
|
||||
btl $26,(%eax)
|
||||
jnc .L004non_sse2
|
||||
movl $-1,%eax
|
||||
movd %eax,%mm7
|
||||
movl 8(%esp),%esi
|
||||
movl 12(%esp),%edi
|
||||
movl 16(%esp),%ebp
|
||||
xorl %edx,%edx
|
||||
xorl %ecx,%ecx
|
||||
movd (%edi),%mm4
|
||||
movd (%esi),%mm5
|
||||
movd (%ebp),%mm3
|
||||
pmuludq %mm4,%mm5
|
||||
movq %mm5,%mm2
|
||||
movq %mm5,%mm0
|
||||
pand %mm7,%mm0
|
||||
pmuludq 20(%esp),%mm5
|
||||
pmuludq %mm5,%mm3
|
||||
paddq %mm0,%mm3
|
||||
movd 4(%ebp),%mm1
|
||||
movd 4(%esi),%mm0
|
||||
psrlq $32,%mm2
|
||||
psrlq $32,%mm3
|
||||
incl %ecx
|
||||
.align 16
|
||||
.L0051st:
|
||||
pmuludq %mm4,%mm0
|
||||
pmuludq %mm5,%mm1
|
||||
paddq %mm0,%mm2
|
||||
paddq %mm1,%mm3
|
||||
movq %mm2,%mm0
|
||||
pand %mm7,%mm0
|
||||
movd 4(%ebp,%ecx,4),%mm1
|
||||
paddq %mm0,%mm3
|
||||
movd 4(%esi,%ecx,4),%mm0
|
||||
psrlq $32,%mm2
|
||||
movd %mm3,28(%esp,%ecx,4)
|
||||
psrlq $32,%mm3
|
||||
leal 1(%ecx),%ecx
|
||||
cmpl %ebx,%ecx
|
||||
jl .L0051st
|
||||
pmuludq %mm4,%mm0
|
||||
pmuludq %mm5,%mm1
|
||||
paddq %mm0,%mm2
|
||||
paddq %mm1,%mm3
|
||||
movq %mm2,%mm0
|
||||
pand %mm7,%mm0
|
||||
paddq %mm0,%mm3
|
||||
movd %mm3,28(%esp,%ecx,4)
|
||||
psrlq $32,%mm2
|
||||
psrlq $32,%mm3
|
||||
paddq %mm2,%mm3
|
||||
movq %mm3,32(%esp,%ebx,4)
|
||||
incl %edx
|
||||
.L006outer:
|
||||
xorl %ecx,%ecx
|
||||
movd (%edi,%edx,4),%mm4
|
||||
movd (%esi),%mm5
|
||||
movd 32(%esp),%mm6
|
||||
movd (%ebp),%mm3
|
||||
pmuludq %mm4,%mm5
|
||||
paddq %mm6,%mm5
|
||||
movq %mm5,%mm0
|
||||
movq %mm5,%mm2
|
||||
pand %mm7,%mm0
|
||||
pmuludq 20(%esp),%mm5
|
||||
pmuludq %mm5,%mm3
|
||||
paddq %mm0,%mm3
|
||||
movd 36(%esp),%mm6
|
||||
movd 4(%ebp),%mm1
|
||||
movd 4(%esi),%mm0
|
||||
psrlq $32,%mm2
|
||||
psrlq $32,%mm3
|
||||
paddq %mm6,%mm2
|
||||
incl %ecx
|
||||
decl %ebx
|
||||
.L007inner:
|
||||
pmuludq %mm4,%mm0
|
||||
pmuludq %mm5,%mm1
|
||||
paddq %mm0,%mm2
|
||||
paddq %mm1,%mm3
|
||||
movq %mm2,%mm0
|
||||
movd 36(%esp,%ecx,4),%mm6
|
||||
pand %mm7,%mm0
|
||||
movd 4(%ebp,%ecx,4),%mm1
|
||||
paddq %mm0,%mm3
|
||||
movd 4(%esi,%ecx,4),%mm0
|
||||
psrlq $32,%mm2
|
||||
movd %mm3,28(%esp,%ecx,4)
|
||||
psrlq $32,%mm3
|
||||
paddq %mm6,%mm2
|
||||
decl %ebx
|
||||
leal 1(%ecx),%ecx
|
||||
jnz .L007inner
|
||||
movl %ecx,%ebx
|
||||
pmuludq %mm4,%mm0
|
||||
pmuludq %mm5,%mm1
|
||||
paddq %mm0,%mm2
|
||||
paddq %mm1,%mm3
|
||||
movq %mm2,%mm0
|
||||
pand %mm7,%mm0
|
||||
paddq %mm0,%mm3
|
||||
movd %mm3,28(%esp,%ecx,4)
|
||||
psrlq $32,%mm2
|
||||
psrlq $32,%mm3
|
||||
movd 36(%esp,%ebx,4),%mm6
|
||||
paddq %mm2,%mm3
|
||||
paddq %mm6,%mm3
|
||||
movq %mm3,32(%esp,%ebx,4)
|
||||
leal 1(%edx),%edx
|
||||
cmpl %ebx,%edx
|
||||
jle .L006outer
|
||||
emms
|
||||
jmp .L008common_tail
|
||||
.align 16
|
||||
.L004non_sse2:
|
||||
movl 8(%esp),%esi
|
||||
leal 1(%ebx),%ebp
|
||||
movl 12(%esp),%edi
|
||||
@ -69,12 +189,12 @@ bn_mul_mont:
|
||||
leal 4(%edi,%ebx,4),%eax
|
||||
orl %edx,%ebp
|
||||
movl (%edi),%edi
|
||||
jz .L003bn_sqr_mont
|
||||
jz .L009bn_sqr_mont
|
||||
movl %eax,28(%esp)
|
||||
movl (%esi),%eax
|
||||
xorl %edx,%edx
|
||||
.align 16
|
||||
.L004mull:
|
||||
.L010mull:
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl %eax,%ebp
|
||||
@ -83,7 +203,7 @@ bn_mul_mont:
|
||||
movl (%esi,%ecx,4),%eax
|
||||
cmpl %ebx,%ecx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
jl .L004mull
|
||||
jl .L010mull
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
movl 20(%esp),%edi
|
||||
@ -101,9 +221,9 @@ bn_mul_mont:
|
||||
movl 4(%esi),%eax
|
||||
adcl $0,%edx
|
||||
incl %ecx
|
||||
jmp .L0052ndmadd
|
||||
jmp .L0112ndmadd
|
||||
.align 16
|
||||
.L0061stmadd:
|
||||
.L0121stmadd:
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ecx,4),%ebp
|
||||
@ -114,7 +234,7 @@ bn_mul_mont:
|
||||
adcl $0,%edx
|
||||
cmpl %ebx,%ecx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
jl .L0061stmadd
|
||||
jl .L0121stmadd
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ebx,4),%eax
|
||||
@ -137,7 +257,7 @@ bn_mul_mont:
|
||||
adcl $0,%edx
|
||||
movl $1,%ecx
|
||||
.align 16
|
||||
.L0052ndmadd:
|
||||
.L0112ndmadd:
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ecx,4),%ebp
|
||||
@ -148,7 +268,7 @@ bn_mul_mont:
|
||||
adcl $0,%edx
|
||||
cmpl %ebx,%ecx
|
||||
movl %ebp,24(%esp,%ecx,4)
|
||||
jl .L0052ndmadd
|
||||
jl .L0112ndmadd
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ebx,4),%ebp
|
||||
@ -164,16 +284,16 @@ bn_mul_mont:
|
||||
movl %edx,32(%esp,%ebx,4)
|
||||
cmpl 28(%esp),%ecx
|
||||
movl %eax,36(%esp,%ebx,4)
|
||||
je .L007common_tail
|
||||
je .L008common_tail
|
||||
movl (%ecx),%edi
|
||||
movl 8(%esp),%esi
|
||||
movl %ecx,12(%esp)
|
||||
xorl %ecx,%ecx
|
||||
xorl %edx,%edx
|
||||
movl (%esi),%eax
|
||||
jmp .L0061stmadd
|
||||
jmp .L0121stmadd
|
||||
.align 16
|
||||
.L003bn_sqr_mont:
|
||||
.L009bn_sqr_mont:
|
||||
movl %ebx,(%esp)
|
||||
movl %ecx,12(%esp)
|
||||
movl %edi,%eax
|
||||
@ -184,7 +304,7 @@ bn_mul_mont:
|
||||
andl $1,%ebx
|
||||
incl %ecx
|
||||
.align 16
|
||||
.L008sqr:
|
||||
.L013sqr:
|
||||
movl (%esi,%ecx,4),%eax
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
@ -196,7 +316,7 @@ bn_mul_mont:
|
||||
cmpl (%esp),%ecx
|
||||
movl %eax,%ebx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
jl .L008sqr
|
||||
jl .L013sqr
|
||||
movl (%esi,%ecx,4),%eax
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
@ -220,7 +340,7 @@ bn_mul_mont:
|
||||
movl 4(%esi),%eax
|
||||
movl $1,%ecx
|
||||
.align 16
|
||||
.L0093rdmadd:
|
||||
.L0143rdmadd:
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ecx,4),%ebp
|
||||
@ -239,7 +359,7 @@ bn_mul_mont:
|
||||
adcl $0,%edx
|
||||
cmpl %ebx,%ecx
|
||||
movl %ebp,24(%esp,%ecx,4)
|
||||
jl .L0093rdmadd
|
||||
jl .L0143rdmadd
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ebx,4),%ebp
|
||||
@ -255,7 +375,7 @@ bn_mul_mont:
|
||||
movl %edx,32(%esp,%ebx,4)
|
||||
cmpl %ebx,%ecx
|
||||
movl %eax,36(%esp,%ebx,4)
|
||||
je .L007common_tail
|
||||
je .L008common_tail
|
||||
movl 4(%esi,%ecx,4),%edi
|
||||
leal 1(%ecx),%ecx
|
||||
movl %edi,%eax
|
||||
@ -267,12 +387,12 @@ bn_mul_mont:
|
||||
xorl %ebp,%ebp
|
||||
cmpl %ebx,%ecx
|
||||
leal 1(%ecx),%ecx
|
||||
je .L010sqrlast
|
||||
je .L015sqrlast
|
||||
movl %edx,%ebx
|
||||
shrl $1,%edx
|
||||
andl $1,%ebx
|
||||
.align 16
|
||||
.L011sqradd:
|
||||
.L016sqradd:
|
||||
movl (%esi,%ecx,4),%eax
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
@ -288,13 +408,13 @@ bn_mul_mont:
|
||||
cmpl (%esp),%ecx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
movl %eax,%ebx
|
||||
jle .L011sqradd
|
||||
jle .L016sqradd
|
||||
movl %edx,%ebp
|
||||
addl %edx,%edx
|
||||
shrl $31,%ebp
|
||||
addl %ebx,%edx
|
||||
adcl $0,%ebp
|
||||
.L010sqrlast:
|
||||
.L015sqrlast:
|
||||
movl 20(%esp),%edi
|
||||
movl 16(%esp),%esi
|
||||
imull 32(%esp),%edi
|
||||
@ -309,9 +429,9 @@ bn_mul_mont:
|
||||
adcl $0,%edx
|
||||
movl $1,%ecx
|
||||
movl 4(%esi),%eax
|
||||
jmp .L0093rdmadd
|
||||
jmp .L0143rdmadd
|
||||
.align 16
|
||||
.L007common_tail:
|
||||
.L008common_tail:
|
||||
movl 16(%esp),%ebp
|
||||
movl 4(%esp),%edi
|
||||
leal 32(%esp),%esi
|
||||
@ -319,19 +439,19 @@ bn_mul_mont:
|
||||
movl %ebx,%ecx
|
||||
xorl %edx,%edx
|
||||
.align 16
|
||||
.L012sub:
|
||||
.L017sub:
|
||||
sbbl (%ebp,%edx,4),%eax
|
||||
movl %eax,(%edi,%edx,4)
|
||||
decl %ecx
|
||||
movl 4(%esi,%edx,4),%eax
|
||||
leal 1(%edx),%edx
|
||||
jge .L012sub
|
||||
jge .L017sub
|
||||
sbbl $0,%eax
|
||||
movl $-1,%edx
|
||||
xorl %eax,%edx
|
||||
jmp .L013copy
|
||||
jmp .L018copy
|
||||
.align 16
|
||||
.L013copy:
|
||||
.L018copy:
|
||||
movl 32(%esp,%ebx,4),%esi
|
||||
movl (%edi,%ebx,4),%ebp
|
||||
movl %ecx,32(%esp,%ebx,4)
|
||||
@ -340,7 +460,7 @@ bn_mul_mont:
|
||||
orl %esi,%ebp
|
||||
movl %ebp,(%edi,%ebx,4)
|
||||
decl %ebx
|
||||
jge .L013copy
|
||||
jge .L018copy
|
||||
movl 24(%esp),%esp
|
||||
movl $1,%eax
|
||||
.L000just_leave:
|
||||
@ -355,6 +475,7 @@ bn_mul_mont:
|
||||
.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
|
||||
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
|
||||
.byte 111,114,103,62,0
|
||||
.comm OPENSSL_ia32cap_P,16,4
|
||||
#else
|
||||
.text
|
||||
.globl bn_mul_mont
|
||||
@ -414,6 +535,123 @@ bn_mul_mont:
|
||||
movl %esi,20(%esp)
|
||||
leal -3(%edi),%ebx
|
||||
movl %edx,24(%esp)
|
||||
leal OPENSSL_ia32cap_P,%eax
|
||||
btl $26,(%eax)
|
||||
jnc .L003non_sse2
|
||||
movl $-1,%eax
|
||||
movd %eax,%mm7
|
||||
movl 8(%esp),%esi
|
||||
movl 12(%esp),%edi
|
||||
movl 16(%esp),%ebp
|
||||
xorl %edx,%edx
|
||||
xorl %ecx,%ecx
|
||||
movd (%edi),%mm4
|
||||
movd (%esi),%mm5
|
||||
movd (%ebp),%mm3
|
||||
pmuludq %mm4,%mm5
|
||||
movq %mm5,%mm2
|
||||
movq %mm5,%mm0
|
||||
pand %mm7,%mm0
|
||||
pmuludq 20(%esp),%mm5
|
||||
pmuludq %mm5,%mm3
|
||||
paddq %mm0,%mm3
|
||||
movd 4(%ebp),%mm1
|
||||
movd 4(%esi),%mm0
|
||||
psrlq $32,%mm2
|
||||
psrlq $32,%mm3
|
||||
incl %ecx
|
||||
.align 16
|
||||
.L0041st:
|
||||
pmuludq %mm4,%mm0
|
||||
pmuludq %mm5,%mm1
|
||||
paddq %mm0,%mm2
|
||||
paddq %mm1,%mm3
|
||||
movq %mm2,%mm0
|
||||
pand %mm7,%mm0
|
||||
movd 4(%ebp,%ecx,4),%mm1
|
||||
paddq %mm0,%mm3
|
||||
movd 4(%esi,%ecx,4),%mm0
|
||||
psrlq $32,%mm2
|
||||
movd %mm3,28(%esp,%ecx,4)
|
||||
psrlq $32,%mm3
|
||||
leal 1(%ecx),%ecx
|
||||
cmpl %ebx,%ecx
|
||||
jl .L0041st
|
||||
pmuludq %mm4,%mm0
|
||||
pmuludq %mm5,%mm1
|
||||
paddq %mm0,%mm2
|
||||
paddq %mm1,%mm3
|
||||
movq %mm2,%mm0
|
||||
pand %mm7,%mm0
|
||||
paddq %mm0,%mm3
|
||||
movd %mm3,28(%esp,%ecx,4)
|
||||
psrlq $32,%mm2
|
||||
psrlq $32,%mm3
|
||||
paddq %mm2,%mm3
|
||||
movq %mm3,32(%esp,%ebx,4)
|
||||
incl %edx
|
||||
.L005outer:
|
||||
xorl %ecx,%ecx
|
||||
movd (%edi,%edx,4),%mm4
|
||||
movd (%esi),%mm5
|
||||
movd 32(%esp),%mm6
|
||||
movd (%ebp),%mm3
|
||||
pmuludq %mm4,%mm5
|
||||
paddq %mm6,%mm5
|
||||
movq %mm5,%mm0
|
||||
movq %mm5,%mm2
|
||||
pand %mm7,%mm0
|
||||
pmuludq 20(%esp),%mm5
|
||||
pmuludq %mm5,%mm3
|
||||
paddq %mm0,%mm3
|
||||
movd 36(%esp),%mm6
|
||||
movd 4(%ebp),%mm1
|
||||
movd 4(%esi),%mm0
|
||||
psrlq $32,%mm2
|
||||
psrlq $32,%mm3
|
||||
paddq %mm6,%mm2
|
||||
incl %ecx
|
||||
decl %ebx
|
||||
.L006inner:
|
||||
pmuludq %mm4,%mm0
|
||||
pmuludq %mm5,%mm1
|
||||
paddq %mm0,%mm2
|
||||
paddq %mm1,%mm3
|
||||
movq %mm2,%mm0
|
||||
movd 36(%esp,%ecx,4),%mm6
|
||||
pand %mm7,%mm0
|
||||
movd 4(%ebp,%ecx,4),%mm1
|
||||
paddq %mm0,%mm3
|
||||
movd 4(%esi,%ecx,4),%mm0
|
||||
psrlq $32,%mm2
|
||||
movd %mm3,28(%esp,%ecx,4)
|
||||
psrlq $32,%mm3
|
||||
paddq %mm6,%mm2
|
||||
decl %ebx
|
||||
leal 1(%ecx),%ecx
|
||||
jnz .L006inner
|
||||
movl %ecx,%ebx
|
||||
pmuludq %mm4,%mm0
|
||||
pmuludq %mm5,%mm1
|
||||
paddq %mm0,%mm2
|
||||
paddq %mm1,%mm3
|
||||
movq %mm2,%mm0
|
||||
pand %mm7,%mm0
|
||||
paddq %mm0,%mm3
|
||||
movd %mm3,28(%esp,%ecx,4)
|
||||
psrlq $32,%mm2
|
||||
psrlq $32,%mm3
|
||||
movd 36(%esp,%ebx,4),%mm6
|
||||
paddq %mm2,%mm3
|
||||
paddq %mm6,%mm3
|
||||
movq %mm3,32(%esp,%ebx,4)
|
||||
leal 1(%edx),%edx
|
||||
cmpl %ebx,%edx
|
||||
jle .L005outer
|
||||
emms
|
||||
jmp .L007common_tail
|
||||
.align 16
|
||||
.L003non_sse2:
|
||||
movl 8(%esp),%esi
|
||||
leal 1(%ebx),%ebp
|
||||
movl 12(%esp),%edi
|
||||
@ -424,12 +662,12 @@ bn_mul_mont:
|
||||
leal 4(%edi,%ebx,4),%eax
|
||||
orl %edx,%ebp
|
||||
movl (%edi),%edi
|
||||
jz .L003bn_sqr_mont
|
||||
jz .L008bn_sqr_mont
|
||||
movl %eax,28(%esp)
|
||||
movl (%esi),%eax
|
||||
xorl %edx,%edx
|
||||
.align 16
|
||||
.L004mull:
|
||||
.L009mull:
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl %eax,%ebp
|
||||
@ -438,7 +676,7 @@ bn_mul_mont:
|
||||
movl (%esi,%ecx,4),%eax
|
||||
cmpl %ebx,%ecx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
jl .L004mull
|
||||
jl .L009mull
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
movl 20(%esp),%edi
|
||||
@ -456,9 +694,9 @@ bn_mul_mont:
|
||||
movl 4(%esi),%eax
|
||||
adcl $0,%edx
|
||||
incl %ecx
|
||||
jmp .L0052ndmadd
|
||||
jmp .L0102ndmadd
|
||||
.align 16
|
||||
.L0061stmadd:
|
||||
.L0111stmadd:
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ecx,4),%ebp
|
||||
@ -469,7 +707,7 @@ bn_mul_mont:
|
||||
adcl $0,%edx
|
||||
cmpl %ebx,%ecx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
jl .L0061stmadd
|
||||
jl .L0111stmadd
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ebx,4),%eax
|
||||
@ -492,7 +730,7 @@ bn_mul_mont:
|
||||
adcl $0,%edx
|
||||
movl $1,%ecx
|
||||
.align 16
|
||||
.L0052ndmadd:
|
||||
.L0102ndmadd:
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ecx,4),%ebp
|
||||
@ -503,7 +741,7 @@ bn_mul_mont:
|
||||
adcl $0,%edx
|
||||
cmpl %ebx,%ecx
|
||||
movl %ebp,24(%esp,%ecx,4)
|
||||
jl .L0052ndmadd
|
||||
jl .L0102ndmadd
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ebx,4),%ebp
|
||||
@ -526,9 +764,9 @@ bn_mul_mont:
|
||||
xorl %ecx,%ecx
|
||||
xorl %edx,%edx
|
||||
movl (%esi),%eax
|
||||
jmp .L0061stmadd
|
||||
jmp .L0111stmadd
|
||||
.align 16
|
||||
.L003bn_sqr_mont:
|
||||
.L008bn_sqr_mont:
|
||||
movl %ebx,(%esp)
|
||||
movl %ecx,12(%esp)
|
||||
movl %edi,%eax
|
||||
@ -539,7 +777,7 @@ bn_mul_mont:
|
||||
andl $1,%ebx
|
||||
incl %ecx
|
||||
.align 16
|
||||
.L008sqr:
|
||||
.L012sqr:
|
||||
movl (%esi,%ecx,4),%eax
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
@ -551,7 +789,7 @@ bn_mul_mont:
|
||||
cmpl (%esp),%ecx
|
||||
movl %eax,%ebx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
jl .L008sqr
|
||||
jl .L012sqr
|
||||
movl (%esi,%ecx,4),%eax
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
@ -575,7 +813,7 @@ bn_mul_mont:
|
||||
movl 4(%esi),%eax
|
||||
movl $1,%ecx
|
||||
.align 16
|
||||
.L0093rdmadd:
|
||||
.L0133rdmadd:
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ecx,4),%ebp
|
||||
@ -594,7 +832,7 @@ bn_mul_mont:
|
||||
adcl $0,%edx
|
||||
cmpl %ebx,%ecx
|
||||
movl %ebp,24(%esp,%ecx,4)
|
||||
jl .L0093rdmadd
|
||||
jl .L0133rdmadd
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ebx,4),%ebp
|
||||
@ -622,12 +860,12 @@ bn_mul_mont:
|
||||
xorl %ebp,%ebp
|
||||
cmpl %ebx,%ecx
|
||||
leal 1(%ecx),%ecx
|
||||
je .L010sqrlast
|
||||
je .L014sqrlast
|
||||
movl %edx,%ebx
|
||||
shrl $1,%edx
|
||||
andl $1,%ebx
|
||||
.align 16
|
||||
.L011sqradd:
|
||||
.L015sqradd:
|
||||
movl (%esi,%ecx,4),%eax
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
@ -643,13 +881,13 @@ bn_mul_mont:
|
||||
cmpl (%esp),%ecx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
movl %eax,%ebx
|
||||
jle .L011sqradd
|
||||
jle .L015sqradd
|
||||
movl %edx,%ebp
|
||||
addl %edx,%edx
|
||||
shrl $31,%ebp
|
||||
addl %ebx,%edx
|
||||
adcl $0,%ebp
|
||||
.L010sqrlast:
|
||||
.L014sqrlast:
|
||||
movl 20(%esp),%edi
|
||||
movl 16(%esp),%esi
|
||||
imull 32(%esp),%edi
|
||||
@ -664,7 +902,7 @@ bn_mul_mont:
|
||||
adcl $0,%edx
|
||||
movl $1,%ecx
|
||||
movl 4(%esi),%eax
|
||||
jmp .L0093rdmadd
|
||||
jmp .L0133rdmadd
|
||||
.align 16
|
||||
.L007common_tail:
|
||||
movl 16(%esp),%ebp
|
||||
@ -674,19 +912,19 @@ bn_mul_mont:
|
||||
movl %ebx,%ecx
|
||||
xorl %edx,%edx
|
||||
.align 16
|
||||
.L012sub:
|
||||
.L016sub:
|
||||
sbbl (%ebp,%edx,4),%eax
|
||||
movl %eax,(%edi,%edx,4)
|
||||
decl %ecx
|
||||
movl 4(%esi,%edx,4),%eax
|
||||
leal 1(%edx),%edx
|
||||
jge .L012sub
|
||||
jge .L016sub
|
||||
sbbl $0,%eax
|
||||
movl $-1,%edx
|
||||
xorl %eax,%edx
|
||||
jmp .L013copy
|
||||
jmp .L017copy
|
||||
.align 16
|
||||
.L013copy:
|
||||
.L017copy:
|
||||
movl 32(%esp,%ebx,4),%esi
|
||||
movl (%edi,%ebx,4),%ebp
|
||||
movl %ecx,32(%esp,%ebx,4)
|
||||
@ -695,7 +933,7 @@ bn_mul_mont:
|
||||
orl %esi,%ebp
|
||||
movl %ebp,(%edi,%ebx,4)
|
||||
decl %ebx
|
||||
jge .L013copy
|
||||
jge .L017copy
|
||||
movl 24(%esp),%esp
|
||||
movl $1,%eax
|
||||
.L000just_leave:
|
||||
@ -710,4 +948,5 @@ bn_mul_mont:
|
||||
.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
|
||||
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
|
||||
.byte 111,114,103,62,0
|
||||
.comm OPENSSL_ia32cap_P,16,4
|
||||
#endif
|
||||
|
@ -236,6 +236,18 @@ OPENSSL_wipe_cpu:
|
||||
movl (%ecx),%ecx
|
||||
btl $1,(%ecx)
|
||||
jnc .L016no_x87
|
||||
andl $83886080,%ecx
|
||||
cmpl $83886080,%ecx
|
||||
jne .L017no_sse2
|
||||
pxor %xmm0,%xmm0
|
||||
pxor %xmm1,%xmm1
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
pxor %xmm4,%xmm4
|
||||
pxor %xmm5,%xmm5
|
||||
pxor %xmm6,%xmm6
|
||||
pxor %xmm7,%xmm7
|
||||
.L017no_sse2:
|
||||
.long 4007259865,4007259865,4007259865,4007259865,2430851995
|
||||
.L016no_x87:
|
||||
leal 4(%esp),%eax
|
||||
@ -251,11 +263,11 @@ OPENSSL_atomic_add:
|
||||
pushl %ebx
|
||||
nop
|
||||
movl (%edx),%eax
|
||||
.L017spin:
|
||||
.L018spin:
|
||||
leal (%eax,%ecx,1),%ebx
|
||||
nop
|
||||
.long 447811568
|
||||
jne .L017spin
|
||||
jne .L018spin
|
||||
movl %ebx,%eax
|
||||
popl %ebx
|
||||
ret
|
||||
@ -269,32 +281,32 @@ OPENSSL_cleanse:
|
||||
movl 8(%esp),%ecx
|
||||
xorl %eax,%eax
|
||||
cmpl $7,%ecx
|
||||
jae .L018lot
|
||||
jae .L019lot
|
||||
cmpl $0,%ecx
|
||||
je .L019ret
|
||||
.L020little:
|
||||
je .L020ret
|
||||
.L021little:
|
||||
movb %al,(%edx)
|
||||
subl $1,%ecx
|
||||
leal 1(%edx),%edx
|
||||
jnz .L020little
|
||||
.L019ret:
|
||||
jnz .L021little
|
||||
.L020ret:
|
||||
ret
|
||||
.align 16
|
||||
.L018lot:
|
||||
.L019lot:
|
||||
testl $3,%edx
|
||||
jz .L021aligned
|
||||
jz .L022aligned
|
||||
movb %al,(%edx)
|
||||
leal -1(%ecx),%ecx
|
||||
leal 1(%edx),%edx
|
||||
jmp .L018lot
|
||||
.L021aligned:
|
||||
jmp .L019lot
|
||||
.L022aligned:
|
||||
movl %eax,(%edx)
|
||||
leal -4(%ecx),%ecx
|
||||
testl $-4,%ecx
|
||||
leal 4(%edx),%edx
|
||||
jnz .L021aligned
|
||||
jnz .L022aligned
|
||||
cmpl $0,%ecx
|
||||
jne .L020little
|
||||
jne .L021little
|
||||
ret
|
||||
.size OPENSSL_cleanse,.-.L_OPENSSL_cleanse_begin
|
||||
.globl CRYPTO_memcmp
|
||||
@ -310,18 +322,18 @@ CRYPTO_memcmp:
|
||||
xorl %eax,%eax
|
||||
xorl %edx,%edx
|
||||
cmpl $0,%ecx
|
||||
je .L022no_data
|
||||
.L023loop:
|
||||
je .L023no_data
|
||||
.L024loop:
|
||||
movb (%esi),%dl
|
||||
leal 1(%esi),%esi
|
||||
xorb (%edi),%dl
|
||||
leal 1(%edi),%edi
|
||||
orb %dl,%al
|
||||
decl %ecx
|
||||
jnz .L023loop
|
||||
jnz .L024loop
|
||||
negl %eax
|
||||
shrl $31,%eax
|
||||
.L022no_data:
|
||||
.L023no_data:
|
||||
popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
@ -336,6 +348,38 @@ OPENSSL_instrument_bus:
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl $0,%eax
|
||||
call .L025PIC_me_up
|
||||
.L025PIC_me_up:
|
||||
popl %edx
|
||||
leal OPENSSL_ia32cap_P-.L025PIC_me_up(%edx),%edx
|
||||
btl $4,(%edx)
|
||||
jnc .L026nogo
|
||||
btl $19,(%edx)
|
||||
jnc .L026nogo
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%ecx
|
||||
.byte 0x0f,0x31
|
||||
movl %eax,%esi
|
||||
movl $0,%ebx
|
||||
clflush (%edi)
|
||||
.byte 240
|
||||
addl %ebx,(%edi)
|
||||
jmp .L027loop
|
||||
.align 16
|
||||
.L027loop:
|
||||
.byte 0x0f,0x31
|
||||
movl %eax,%edx
|
||||
subl %esi,%eax
|
||||
movl %edx,%esi
|
||||
movl %eax,%ebx
|
||||
clflush (%edi)
|
||||
.byte 240
|
||||
addl %eax,(%edi)
|
||||
leal 4(%edi),%edi
|
||||
subl $1,%ecx
|
||||
jnz .L027loop
|
||||
movl 24(%esp),%eax
|
||||
.L026nogo:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
@ -352,6 +396,51 @@ OPENSSL_instrument_bus2:
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl $0,%eax
|
||||
call .L028PIC_me_up
|
||||
.L028PIC_me_up:
|
||||
popl %edx
|
||||
leal OPENSSL_ia32cap_P-.L028PIC_me_up(%edx),%edx
|
||||
btl $4,(%edx)
|
||||
jnc .L029nogo
|
||||
btl $19,(%edx)
|
||||
jnc .L029nogo
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%ecx
|
||||
movl 28(%esp),%ebp
|
||||
.byte 0x0f,0x31
|
||||
movl %eax,%esi
|
||||
movl $0,%ebx
|
||||
clflush (%edi)
|
||||
.byte 240
|
||||
addl %ebx,(%edi)
|
||||
.byte 0x0f,0x31
|
||||
movl %eax,%edx
|
||||
subl %esi,%eax
|
||||
movl %edx,%esi
|
||||
movl %eax,%ebx
|
||||
jmp .L030loop2
|
||||
.align 16
|
||||
.L030loop2:
|
||||
clflush (%edi)
|
||||
.byte 240
|
||||
addl %eax,(%edi)
|
||||
subl $1,%ebp
|
||||
jz .L031done2
|
||||
.byte 0x0f,0x31
|
||||
movl %eax,%edx
|
||||
subl %esi,%eax
|
||||
movl %edx,%esi
|
||||
cmpl %ebx,%eax
|
||||
movl %eax,%ebx
|
||||
movl $0,%edx
|
||||
setne %dl
|
||||
subl %edx,%ecx
|
||||
leal (%edi,%edx,4),%edi
|
||||
jnz .L030loop2
|
||||
.L031done2:
|
||||
movl 24(%esp),%eax
|
||||
subl %ecx,%eax
|
||||
.L029nogo:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
@ -369,33 +458,33 @@ OPENSSL_ia32_rdrand_bytes:
|
||||
movl 12(%esp),%edi
|
||||
movl 16(%esp),%ebx
|
||||
cmpl $0,%ebx
|
||||
je .L024done
|
||||
je .L032done
|
||||
movl $8,%ecx
|
||||
.L025loop:
|
||||
.L033loop:
|
||||
.byte 15,199,242
|
||||
jc .L026break
|
||||
loop .L025loop
|
||||
jmp .L024done
|
||||
jc .L034break
|
||||
loop .L033loop
|
||||
jmp .L032done
|
||||
.align 16
|
||||
.L026break:
|
||||
.L034break:
|
||||
cmpl $4,%ebx
|
||||
jb .L027tail
|
||||
jb .L035tail
|
||||
movl %edx,(%edi)
|
||||
leal 4(%edi),%edi
|
||||
addl $4,%eax
|
||||
subl $4,%ebx
|
||||
jz .L024done
|
||||
jz .L032done
|
||||
movl $8,%ecx
|
||||
jmp .L025loop
|
||||
jmp .L033loop
|
||||
.align 16
|
||||
.L027tail:
|
||||
.L035tail:
|
||||
movb %dl,(%edi)
|
||||
leal 1(%edi),%edi
|
||||
incl %eax
|
||||
shrl $8,%edx
|
||||
decl %ebx
|
||||
jnz .L027tail
|
||||
.L024done:
|
||||
jnz .L035tail
|
||||
.L032done:
|
||||
xorl %edx,%edx
|
||||
popl %ebx
|
||||
popl %edi
|
||||
@ -412,33 +501,33 @@ OPENSSL_ia32_rdseed_bytes:
|
||||
movl 12(%esp),%edi
|
||||
movl 16(%esp),%ebx
|
||||
cmpl $0,%ebx
|
||||
je .L028done
|
||||
je .L036done
|
||||
movl $8,%ecx
|
||||
.L029loop:
|
||||
.L037loop:
|
||||
.byte 15,199,250
|
||||
jc .L030break
|
||||
loop .L029loop
|
||||
jmp .L028done
|
||||
jc .L038break
|
||||
loop .L037loop
|
||||
jmp .L036done
|
||||
.align 16
|
||||
.L030break:
|
||||
.L038break:
|
||||
cmpl $4,%ebx
|
||||
jb .L031tail
|
||||
jb .L039tail
|
||||
movl %edx,(%edi)
|
||||
leal 4(%edi),%edi
|
||||
addl $4,%eax
|
||||
subl $4,%ebx
|
||||
jz .L028done
|
||||
jz .L036done
|
||||
movl $8,%ecx
|
||||
jmp .L029loop
|
||||
jmp .L037loop
|
||||
.align 16
|
||||
.L031tail:
|
||||
.L039tail:
|
||||
movb %dl,(%edi)
|
||||
leal 1(%edi),%edi
|
||||
incl %eax
|
||||
shrl $8,%edx
|
||||
decl %ebx
|
||||
jnz .L031tail
|
||||
.L028done:
|
||||
jnz .L039tail
|
||||
.L036done:
|
||||
xorl %edx,%edx
|
||||
popl %ebx
|
||||
popl %edi
|
||||
@ -676,6 +765,18 @@ OPENSSL_wipe_cpu:
|
||||
movl (%ecx),%ecx
|
||||
btl $1,(%ecx)
|
||||
jnc .L013no_x87
|
||||
andl $83886080,%ecx
|
||||
cmpl $83886080,%ecx
|
||||
jne .L014no_sse2
|
||||
pxor %xmm0,%xmm0
|
||||
pxor %xmm1,%xmm1
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
pxor %xmm4,%xmm4
|
||||
pxor %xmm5,%xmm5
|
||||
pxor %xmm6,%xmm6
|
||||
pxor %xmm7,%xmm7
|
||||
.L014no_sse2:
|
||||
.long 4007259865,4007259865,4007259865,4007259865,2430851995
|
||||
.L013no_x87:
|
||||
leal 4(%esp),%eax
|
||||
@ -691,11 +792,11 @@ OPENSSL_atomic_add:
|
||||
pushl %ebx
|
||||
nop
|
||||
movl (%edx),%eax
|
||||
.L014spin:
|
||||
.L015spin:
|
||||
leal (%eax,%ecx,1),%ebx
|
||||
nop
|
||||
.long 447811568
|
||||
jne .L014spin
|
||||
jne .L015spin
|
||||
movl %ebx,%eax
|
||||
popl %ebx
|
||||
ret
|
||||
@ -709,32 +810,32 @@ OPENSSL_cleanse:
|
||||
movl 8(%esp),%ecx
|
||||
xorl %eax,%eax
|
||||
cmpl $7,%ecx
|
||||
jae .L015lot
|
||||
jae .L016lot
|
||||
cmpl $0,%ecx
|
||||
je .L016ret
|
||||
.L017little:
|
||||
je .L017ret
|
||||
.L018little:
|
||||
movb %al,(%edx)
|
||||
subl $1,%ecx
|
||||
leal 1(%edx),%edx
|
||||
jnz .L017little
|
||||
.L016ret:
|
||||
jnz .L018little
|
||||
.L017ret:
|
||||
ret
|
||||
.align 16
|
||||
.L015lot:
|
||||
.L016lot:
|
||||
testl $3,%edx
|
||||
jz .L018aligned
|
||||
jz .L019aligned
|
||||
movb %al,(%edx)
|
||||
leal -1(%ecx),%ecx
|
||||
leal 1(%edx),%edx
|
||||
jmp .L015lot
|
||||
.L018aligned:
|
||||
jmp .L016lot
|
||||
.L019aligned:
|
||||
movl %eax,(%edx)
|
||||
leal -4(%ecx),%ecx
|
||||
testl $-4,%ecx
|
||||
leal 4(%edx),%edx
|
||||
jnz .L018aligned
|
||||
jnz .L019aligned
|
||||
cmpl $0,%ecx
|
||||
jne .L017little
|
||||
jne .L018little
|
||||
ret
|
||||
.size OPENSSL_cleanse,.-.L_OPENSSL_cleanse_begin
|
||||
.globl CRYPTO_memcmp
|
||||
@ -750,18 +851,18 @@ CRYPTO_memcmp:
|
||||
xorl %eax,%eax
|
||||
xorl %edx,%edx
|
||||
cmpl $0,%ecx
|
||||
je .L019no_data
|
||||
.L020loop:
|
||||
je .L020no_data
|
||||
.L021loop:
|
||||
movb (%esi),%dl
|
||||
leal 1(%esi),%esi
|
||||
xorb (%edi),%dl
|
||||
leal 1(%edi),%edi
|
||||
orb %dl,%al
|
||||
decl %ecx
|
||||
jnz .L020loop
|
||||
jnz .L021loop
|
||||
negl %eax
|
||||
shrl $31,%eax
|
||||
.L019no_data:
|
||||
.L020no_data:
|
||||
popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
@ -776,6 +877,35 @@ OPENSSL_instrument_bus:
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl $0,%eax
|
||||
leal OPENSSL_ia32cap_P,%edx
|
||||
btl $4,(%edx)
|
||||
jnc .L022nogo
|
||||
btl $19,(%edx)
|
||||
jnc .L022nogo
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%ecx
|
||||
.byte 0x0f,0x31
|
||||
movl %eax,%esi
|
||||
movl $0,%ebx
|
||||
clflush (%edi)
|
||||
.byte 240
|
||||
addl %ebx,(%edi)
|
||||
jmp .L023loop
|
||||
.align 16
|
||||
.L023loop:
|
||||
.byte 0x0f,0x31
|
||||
movl %eax,%edx
|
||||
subl %esi,%eax
|
||||
movl %edx,%esi
|
||||
movl %eax,%ebx
|
||||
clflush (%edi)
|
||||
.byte 240
|
||||
addl %eax,(%edi)
|
||||
leal 4(%edi),%edi
|
||||
subl $1,%ecx
|
||||
jnz .L023loop
|
||||
movl 24(%esp),%eax
|
||||
.L022nogo:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
@ -792,6 +922,48 @@ OPENSSL_instrument_bus2:
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl $0,%eax
|
||||
leal OPENSSL_ia32cap_P,%edx
|
||||
btl $4,(%edx)
|
||||
jnc .L024nogo
|
||||
btl $19,(%edx)
|
||||
jnc .L024nogo
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%ecx
|
||||
movl 28(%esp),%ebp
|
||||
.byte 0x0f,0x31
|
||||
movl %eax,%esi
|
||||
movl $0,%ebx
|
||||
clflush (%edi)
|
||||
.byte 240
|
||||
addl %ebx,(%edi)
|
||||
.byte 0x0f,0x31
|
||||
movl %eax,%edx
|
||||
subl %esi,%eax
|
||||
movl %edx,%esi
|
||||
movl %eax,%ebx
|
||||
jmp .L025loop2
|
||||
.align 16
|
||||
.L025loop2:
|
||||
clflush (%edi)
|
||||
.byte 240
|
||||
addl %eax,(%edi)
|
||||
subl $1,%ebp
|
||||
jz .L026done2
|
||||
.byte 0x0f,0x31
|
||||
movl %eax,%edx
|
||||
subl %esi,%eax
|
||||
movl %edx,%esi
|
||||
cmpl %ebx,%eax
|
||||
movl %eax,%ebx
|
||||
movl $0,%edx
|
||||
setne %dl
|
||||
subl %edx,%ecx
|
||||
leal (%edi,%edx,4),%edi
|
||||
jnz .L025loop2
|
||||
.L026done2:
|
||||
movl 24(%esp),%eax
|
||||
subl %ecx,%eax
|
||||
.L024nogo:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
@ -809,33 +981,33 @@ OPENSSL_ia32_rdrand_bytes:
|
||||
movl 12(%esp),%edi
|
||||
movl 16(%esp),%ebx
|
||||
cmpl $0,%ebx
|
||||
je .L021done
|
||||
je .L027done
|
||||
movl $8,%ecx
|
||||
.L022loop:
|
||||
.L028loop:
|
||||
.byte 15,199,242
|
||||
jc .L023break
|
||||
loop .L022loop
|
||||
jmp .L021done
|
||||
jc .L029break
|
||||
loop .L028loop
|
||||
jmp .L027done
|
||||
.align 16
|
||||
.L023break:
|
||||
.L029break:
|
||||
cmpl $4,%ebx
|
||||
jb .L024tail
|
||||
jb .L030tail
|
||||
movl %edx,(%edi)
|
||||
leal 4(%edi),%edi
|
||||
addl $4,%eax
|
||||
subl $4,%ebx
|
||||
jz .L021done
|
||||
jz .L027done
|
||||
movl $8,%ecx
|
||||
jmp .L022loop
|
||||
jmp .L028loop
|
||||
.align 16
|
||||
.L024tail:
|
||||
.L030tail:
|
||||
movb %dl,(%edi)
|
||||
leal 1(%edi),%edi
|
||||
incl %eax
|
||||
shrl $8,%edx
|
||||
decl %ebx
|
||||
jnz .L024tail
|
||||
.L021done:
|
||||
jnz .L030tail
|
||||
.L027done:
|
||||
xorl %edx,%edx
|
||||
popl %ebx
|
||||
popl %edi
|
||||
@ -852,33 +1024,33 @@ OPENSSL_ia32_rdseed_bytes:
|
||||
movl 12(%esp),%edi
|
||||
movl 16(%esp),%ebx
|
||||
cmpl $0,%ebx
|
||||
je .L025done
|
||||
je .L031done
|
||||
movl $8,%ecx
|
||||
.L026loop:
|
||||
.L032loop:
|
||||
.byte 15,199,250
|
||||
jc .L027break
|
||||
loop .L026loop
|
||||
jmp .L025done
|
||||
jc .L033break
|
||||
loop .L032loop
|
||||
jmp .L031done
|
||||
.align 16
|
||||
.L027break:
|
||||
.L033break:
|
||||
cmpl $4,%ebx
|
||||
jb .L028tail
|
||||
jb .L034tail
|
||||
movl %edx,(%edi)
|
||||
leal 4(%edi),%edi
|
||||
addl $4,%eax
|
||||
subl $4,%ebx
|
||||
jz .L025done
|
||||
jz .L031done
|
||||
movl $8,%ecx
|
||||
jmp .L026loop
|
||||
jmp .L032loop
|
||||
.align 16
|
||||
.L028tail:
|
||||
.L034tail:
|
||||
movb %dl,(%edi)
|
||||
leal 1(%edi),%edi
|
||||
incl %eax
|
||||
shrl $8,%edx
|
||||
decl %ebx
|
||||
jnz .L028tail
|
||||
.L025done:
|
||||
jnz .L034tail
|
||||
.L031done:
|
||||
xorl %edx,%edx
|
||||
popl %ebx
|
||||
popl %edi
|
||||
|
Loading…
x
Reference in New Issue
Block a user