Regen cpuid assembly files for aarch64 and arm.

This commit is contained in:
Jung-uk Kim 2018-09-22 03:54:40 +00:00
parent ea19bcde21
commit 61fab32360
3 changed files with 405 additions and 2 deletions

View File

@ -19,8 +19,11 @@
PERLPATH= -I${LCRYPTO_SRC}/crypto/perlasm
# cpuid
SRCS= arm64cpuid.pl
# aes
SRCS= aesv8-armx.pl vpaes-armv8.pl
SRCS+= aesv8-armx.pl vpaes-armv8.pl
# bn
SRCS+= armv8-mont.pl
@ -157,8 +160,11 @@ ${s}.S: ${s}.s
PERLPATH= -I${LCRYPTO_SRC}/crypto/perlasm
# cpuid
SRCS= armv4cpuid.pl
# aes
SRCS= aes-armv4.pl aesv8-armx.pl bsaes-armv7.pl
SRCS+= aes-armv4.pl aesv8-armx.pl bsaes-armv7.pl
# bn
SRCS+= armv4-mont.pl armv4-gf2m.pl

View File

@ -0,0 +1,124 @@
/* $FreeBSD$ */
/* Do not modify. This file is auto-generated from arm64cpuid.pl. */
#include "arm_arch.h"
.text
.arch armv8-a+crypto
.align 5
.globl _armv7_neon_probe
.type _armv7_neon_probe,%function
_armv7_neon_probe:
orr v15.16b, v15.16b, v15.16b
ret
.size _armv7_neon_probe,.-_armv7_neon_probe
.globl _armv7_tick
.type _armv7_tick,%function
_armv7_tick:
#ifdef __APPLE__
mrs x0, CNTPCT_EL0
#else
mrs x0, CNTVCT_EL0
#endif
ret
.size _armv7_tick,.-_armv7_tick
.globl _armv8_aes_probe
.type _armv8_aes_probe,%function
_armv8_aes_probe:
aese v0.16b, v0.16b
ret
.size _armv8_aes_probe,.-_armv8_aes_probe
.globl _armv8_sha1_probe
.type _armv8_sha1_probe,%function
_armv8_sha1_probe:
sha1h s0, s0
ret
.size _armv8_sha1_probe,.-_armv8_sha1_probe
.globl _armv8_sha256_probe
.type _armv8_sha256_probe,%function
_armv8_sha256_probe:
sha256su0 v0.4s, v0.4s
ret
.size _armv8_sha256_probe,.-_armv8_sha256_probe
.globl _armv8_pmull_probe
.type _armv8_pmull_probe,%function
_armv8_pmull_probe:
pmull v0.1q, v0.1d, v0.1d
ret
.size _armv8_pmull_probe,.-_armv8_pmull_probe
.globl _armv8_sha512_probe
.type _armv8_sha512_probe,%function
_armv8_sha512_probe:
.long 0xcec08000 // sha512su0 v0.2d,v0.2d
ret
.size _armv8_sha512_probe,.-_armv8_sha512_probe
.globl OPENSSL_cleanse
.type OPENSSL_cleanse,%function
.align 5
OPENSSL_cleanse:
cbz x1,.Lret // len==0?
cmp x1,#15
b.hi .Lot // len>15
nop
.Little:
strb wzr,[x0],#1 // store byte-by-byte
subs x1,x1,#1
b.ne .Little
.Lret: ret
.align 4
.Lot: tst x0,#7
b.eq .Laligned // inp is aligned
strb wzr,[x0],#1 // store byte-by-byte
sub x1,x1,#1
b .Lot
.align 4
.Laligned:
str xzr,[x0],#8 // store word-by-word
sub x1,x1,#8
tst x1,#-8
b.ne .Laligned // len>=8
cbnz x1,.Little // len!=0?
ret
.size OPENSSL_cleanse,.-OPENSSL_cleanse
.globl CRYPTO_memcmp
.type CRYPTO_memcmp,%function
.align 4
CRYPTO_memcmp:
eor w3,w3,w3
cbz x2,.Lno_data // len==0?
cmp x2,#16
b.ne .Loop_cmp
ldp x8,x9,[x0]
ldp x10,x11,[x1]
eor x8,x8,x10
eor x9,x9,x11
orr x8,x8,x9
mov x0,#1
cmp x8,#0
csel x0,xzr,x0,eq
ret
.align 4
.Loop_cmp:
ldrb w4,[x0],#1
ldrb w5,[x1],#1
eor w4,w4,w5
orr w3,w3,w4
subs x2,x2,#1
b.ne .Loop_cmp
.Lno_data:
neg w0,w3
lsr w0,w0,#31
ret
.size CRYPTO_memcmp,.-CRYPTO_memcmp

View File

@ -0,0 +1,273 @@
/* $FreeBSD$ */
/* Do not modify. This file is auto-generated from armv4cpuid.pl. */
#include "arm_arch.h"
.text
#if defined(__thumb2__) && !defined(__APPLE__)
.syntax unified
.thumb
#else
.code 32
#undef __thumb2__
#endif
.align 5
.globl OPENSSL_atomic_add
.type OPENSSL_atomic_add,%function
OPENSSL_atomic_add:
#if __ARM_ARCH__>=6
.Ladd: ldrex r2,[r0]
add r3,r2,r1
strex r2,r3,[r0]
cmp r2,#0
bne .Ladd
mov r0,r3
bx lr
#else
stmdb sp!,{r4,r5,r6,lr}
ldr r2,.Lspinlock
adr r3,.Lspinlock
mov r4,r0
mov r5,r1
add r6,r3,r2 @ &spinlock
b .+8
.Lspin: bl sched_yield
mov r0,#-1
swp r0,r0,[r6]
cmp r0,#0
bne .Lspin
ldr r2,[r4]
add r2,r2,r5
str r2,[r4]
str r0,[r6] @ release spinlock
ldmia sp!,{r4,r5,r6,lr}
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
#endif
.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
.globl OPENSSL_cleanse
.type OPENSSL_cleanse,%function
OPENSSL_cleanse:
eor ip,ip,ip
cmp r1,#7
#ifdef __thumb2__
itt hs
#endif
subhs r1,r1,#4
bhs .Lot
cmp r1,#0
beq .Lcleanse_done
.Little:
strb ip,[r0],#1
subs r1,r1,#1
bhi .Little
b .Lcleanse_done
.Lot: tst r0,#3
beq .Laligned
strb ip,[r0],#1
sub r1,r1,#1
b .Lot
.Laligned:
str ip,[r0],#4
subs r1,r1,#4
bhs .Laligned
adds r1,r1,#4
bne .Little
.Lcleanse_done:
#if __ARM_ARCH__>=5
bx lr
#else
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
#endif
.size OPENSSL_cleanse,.-OPENSSL_cleanse
.globl CRYPTO_memcmp
.type CRYPTO_memcmp,%function
.align 4
CRYPTO_memcmp:
eor ip,ip,ip
cmp r2,#0
beq .Lno_data
stmdb sp!,{r4,r5}
.Loop_cmp:
ldrb r4,[r0],#1
ldrb r5,[r1],#1
eor r4,r4,r5
orr ip,ip,r4
subs r2,r2,#1
bne .Loop_cmp
ldmia sp!,{r4,r5}
.Lno_data:
rsb r0,ip,#0
mov r0,r0,lsr#31
#if __ARM_ARCH__>=5
bx lr
#else
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
#endif
.size CRYPTO_memcmp,.-CRYPTO_memcmp
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.align 5
.globl _armv7_neon_probe
.type _armv7_neon_probe,%function
_armv7_neon_probe:
vorr q0,q0,q0
bx lr
.size _armv7_neon_probe,.-_armv7_neon_probe
.globl _armv7_tick
.type _armv7_tick,%function
_armv7_tick:
#ifdef __APPLE__
mrrc p15,0,r0,r1,c14 @ CNTPCT
#else
mrrc p15,1,r0,r1,c14 @ CNTVCT
#endif
bx lr
.size _armv7_tick,.-_armv7_tick
.globl _armv8_aes_probe
.type _armv8_aes_probe,%function
_armv8_aes_probe:
#if defined(__thumb2__) && !defined(__APPLE__)
.byte 0xb0,0xff,0x00,0x03 @ aese.8 q0,q0
#else
.byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0
#endif
bx lr
.size _armv8_aes_probe,.-_armv8_aes_probe
.globl _armv8_sha1_probe
.type _armv8_sha1_probe,%function
_armv8_sha1_probe:
#if defined(__thumb2__) && !defined(__APPLE__)
.byte 0x00,0xef,0x40,0x0c @ sha1c.32 q0,q0,q0
#else
.byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0
#endif
bx lr
.size _armv8_sha1_probe,.-_armv8_sha1_probe
.globl _armv8_sha256_probe
.type _armv8_sha256_probe,%function
_armv8_sha256_probe:
#if defined(__thumb2__) && !defined(__APPLE__)
.byte 0x00,0xff,0x40,0x0c @ sha256h.32 q0,q0,q0
#else
.byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0
#endif
bx lr
.size _armv8_sha256_probe,.-_armv8_sha256_probe
.globl _armv8_pmull_probe
.type _armv8_pmull_probe,%function
_armv8_pmull_probe:
#if defined(__thumb2__) && !defined(__APPLE__)
.byte 0xa0,0xef,0x00,0x0e @ vmull.p64 q0,d0,d0
#else
.byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0
#endif
bx lr
.size _armv8_pmull_probe,.-_armv8_pmull_probe
#endif
.globl OPENSSL_wipe_cpu
.type OPENSSL_wipe_cpu,%function
OPENSSL_wipe_cpu:
#if __ARM_MAX_ARCH__>=7
ldr r0,.LOPENSSL_armcap
adr r1,.LOPENSSL_armcap
ldr r0,[r1,r0]
#ifdef __APPLE__
ldr r0,[r0]
#endif
#endif
eor r2,r2,r2
eor r3,r3,r3
eor ip,ip,ip
#if __ARM_MAX_ARCH__>=7
tst r0,#1
beq .Lwipe_done
veor q0, q0, q0
veor q1, q1, q1
veor q2, q2, q2
veor q3, q3, q3
veor q8, q8, q8
veor q9, q9, q9
veor q10, q10, q10
veor q11, q11, q11
veor q12, q12, q12
veor q13, q13, q13
veor q14, q14, q14
veor q15, q15, q15
.Lwipe_done:
#endif
mov r0,sp
#if __ARM_ARCH__>=5
bx lr
#else
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
#endif
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
.globl OPENSSL_instrument_bus
.type OPENSSL_instrument_bus,%function
OPENSSL_instrument_bus:
eor r0,r0,r0
#if __ARM_ARCH__>=5
bx lr
#else
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
#endif
.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
.globl OPENSSL_instrument_bus2
.type OPENSSL_instrument_bus2,%function
OPENSSL_instrument_bus2:
eor r0,r0,r0
#if __ARM_ARCH__>=5
bx lr
#else
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
#endif
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
.align 5
#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-.
#endif
#if __ARM_ARCH__>=6
.align 5
#else
.Lspinlock:
.word atomic_add_spinlock-.Lspinlock
.align 5
.data
.align 2
atomic_add_spinlock:
.word 0
#endif
.comm OPENSSL_armcap_P,4,4
.hidden OPENSSL_armcap_P