Rename skein_block_asm.s to .S and assemble using Clang IAS

Comparing the object files produced by GNU as 2.17.50 and Clang IAS
shows many immaterial changes in strtab etc., and one material change
in .text:

   1bac:  4c 8b 4f 18             mov    0x18(%rdi),%r9
   1bb0:  eb 0e                   jmp    1bc0 <Skein1024_block_loop>
-  1bb2:  66 66 2e 0f 1f 84 00    data16 nopw %cs:0x0(%rax,%rax,1)
-  1bb9:  00 00 00 00
-  1bbd:  0f 1f 00                nopl   (%rax)
+  1bb2:  66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
+  1bb9:  00 00 00
+  1bbc:  0f 1f 40 00             nopl   0x0(%rax)

 0000000000001bc0 <Skein1024_block_loop>:
 Skein1024_block_loop():
   1bc0:  4c 8b 47 10             mov    0x10(%rdi),%r8
   1bc4:  4c 03 85 c0 00 00 00    add    0xc0(%rbp),%r8

That is, GNU as and Clang's integrated assembler use different multi-
byte NOPs for alignment (GNU as emits an 11 byte NOP + a 3 byte NOP,
while Clang IAS emits a 10 byte NOP + a 4 byte NOP).

Dependency cleanup hacks are not required, because we do not create
.depend files from GNU as.

Reviewed by:	allanjude, arichardson, cem, tsoome
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D8434
This commit is contained in:
emaste 2020-06-06 00:35:41 +00:00
parent 98a0e1a9cd
commit 0b1e77734c
3 changed files with 7 additions and 10 deletions

View File

@ -116,18 +116,15 @@ CFLAGS+= -DSHA1_ASM
SRCS+= rmd160.S
CFLAGS+= -DRMD160_ASM
.endif
.if exists(${MACHINE_ARCH}/skein_block_asm.s)
.if defined(XAS) || ${MK_BINUTILS_BOOTSTRAP} != "no"
AFLAGS += --strip-local-absolute
.if exists(${MACHINE_ARCH}/skein_block_asm.S)
# Fully unroll all loops in the assembly optimized version
AFLAGS+= --defsym SKEIN_LOOP=0 --defsym SKEIN_USE_ASM=1792
SRCS+= skein_block_asm.s
ACFLAGS+= -DSKEIN_LOOP=0
SRCS+= skein_block_asm.S
CFLAGS+= -DSKEIN_ASM -DSKEIN_USE_ASM=1792 # list of block functions to replace with assembly: 256+512+1024 = 1792
.else
.warning as not available: not using optimized Skein asm
.endif
.endif
.if exists(${MACHINE_ARCH}/sha.S) || exists(${MACHINE_ARCH}/rmd160.S) || exists(${MACHINE_ARCH}/skein_block_asm.s)
.if exists(${MACHINE_ARCH}/sha.S) || exists(${MACHINE_ARCH}/rmd160.S) || exists(${MACHINE_ARCH}/skein_block_asm.S)
ACFLAGS+= -DELF -Wa,--noexecstack
.endif
.endif # ${USE_ASM_SOURCES} != 0

View File

@ -28,13 +28,13 @@ SRCS += sha1.c sha256c.c sha512c.c
SRCS += skein.c skein_block.c
# unroll the 256 and 512 loops, half unroll the 1024
CFLAGS.skein_block.c += -DSKEIN_LOOP=995
.if exists(${MACHINE_ARCH}/skein_block_asm.s)
.if exists(${MACHINE_ARCH}/skein_block_asm.S)
.PATH: ${SRCTOP}/sys/crypto/skein/${MACHINE_ARCH}
SRCS += skein_block_asm.s
SRCS += skein_block_asm.S
CFLAGS += -DSKEIN_ASM -DSKEIN_USE_ASM=1792 # list of block functions to replace with assembly: 256+512+1024 = 1792
ACFLAGS += -DELF -Wa,--noexecstack
# Fully unroll all loops in the assembly optimized version
AFLAGS+= --defsym SKEIN_LOOP=0 --defsym SKEIN_USE_ASM=1792
ACFLAGS += -DSKEIN_LOOP=0
.endif
SRCS += siphash.c
SRCS += gmac.c gfmult.c