Increase loop unrolling for skein hashes
This patch was inspired by an opposite change made to shrink the code for the boot loader. On my i7-4770, it increases the skein1024 speed from 470 to 550 MB/s Reviewed by: sbruno MFC after: 1 month Sponsored by: ScaleEngine Inc. Differential Revision: https://reviews.freebsd.org/D7824
This commit is contained in:
parent
57c61ab190
commit
162ef49ab1
@ -88,6 +88,8 @@ sys/md5.h: ${SRCTOP}/sys/${.TARGET} .NOMETA
|
|||||||
CFLAGS+= -I${.CURDIR} -I${SRCTOP}/sys/crypto/sha2
|
CFLAGS+= -I${.CURDIR} -I${SRCTOP}/sys/crypto/sha2
|
||||||
CFLAGS+= -I${SRCTOP}/sys/crypto/skein
|
CFLAGS+= -I${SRCTOP}/sys/crypto/skein
|
||||||
CFLAGS+= -DWEAK_REFS
|
CFLAGS+= -DWEAK_REFS
|
||||||
|
# unroll the 256 and 512 loops, half unroll the 1024
|
||||||
|
CFLAGS+= -DSKEIN_LOOP=995
|
||||||
.PATH: ${.CURDIR}/${MACHINE_ARCH} ${SRCTOP}/sys/crypto/sha2
|
.PATH: ${.CURDIR}/${MACHINE_ARCH} ${SRCTOP}/sys/crypto/sha2
|
||||||
.PATH: ${SRCTOP}/sys/crypto/skein ${SRCTOP}/sys/crypto/skein/${MACHINE_ARCH}
|
.PATH: ${SRCTOP}/sys/crypto/skein ${SRCTOP}/sys/crypto/skein/${MACHINE_ARCH}
|
||||||
|
|
||||||
@ -101,6 +103,8 @@ CFLAGS+= -DRMD160_ASM
|
|||||||
.endif
|
.endif
|
||||||
.if exists(${MACHINE_ARCH}/skein_block_asm.s)
|
.if exists(${MACHINE_ARCH}/skein_block_asm.s)
|
||||||
AFLAGS += --strip-local-absolute
|
AFLAGS += --strip-local-absolute
|
||||||
|
# Fully unroll all loops in the assembly optimized version
|
||||||
|
AFLAGS+= --defsym SKEIN_LOOP=0
|
||||||
SRCS+= skein_block_asm.s
|
SRCS+= skein_block_asm.s
|
||||||
CFLAGS+= -DSKEIN_ASM -DSKEIN_USE_ASM=1792 # list of block functions to replace with assembly: 256+512+1024 = 1792
|
CFLAGS+= -DSKEIN_ASM -DSKEIN_USE_ASM=1792 # list of block functions to replace with assembly: 256+512+1024 = 1792
|
||||||
.endif
|
.endif
|
||||||
|
@ -19,11 +19,15 @@ SRCS += camellia.c camellia-api.c
|
|||||||
SRCS += des_ecb.c des_enc.c des_setkey.c
|
SRCS += des_ecb.c des_enc.c des_setkey.c
|
||||||
SRCS += sha1.c sha256c.c sha512c.c
|
SRCS += sha1.c sha256c.c sha512c.c
|
||||||
SRCS += skein.c skein_block.c
|
SRCS += skein.c skein_block.c
|
||||||
|
# unroll the 256 and 512 loops, half unroll the 1024
|
||||||
|
CFLAGS+= -DSKEIN_LOOP=995
|
||||||
.if exists(${MACHINE_ARCH}/skein_block_asm.s)
|
.if exists(${MACHINE_ARCH}/skein_block_asm.s)
|
||||||
.PATH: ${SRCTOP}/sys/crypto/skein/${MACHINE_ARCH}
|
.PATH: ${SRCTOP}/sys/crypto/skein/${MACHINE_ARCH}
|
||||||
SRCS += skein_block_asm.s
|
SRCS += skein_block_asm.s
|
||||||
CFLAGS += -DSKEIN_ASM -DSKEIN_USE_ASM=1792 # list of block functions to replace with assembly: 256+512+1024 = 1792
|
CFLAGS += -DSKEIN_ASM -DSKEIN_USE_ASM=1792 # list of block functions to replace with assembly: 256+512+1024 = 1792
|
||||||
ACFLAGS += -DELF -Wa,--noexecstack
|
ACFLAGS += -DELF -Wa,--noexecstack
|
||||||
|
# Fully unroll all loops in the assembly optimized version
|
||||||
|
AFLAGS+= --defsym SKEIN_LOOP=0
|
||||||
.endif
|
.endif
|
||||||
SRCS += siphash.c
|
SRCS += siphash.c
|
||||||
SRCS += gmac.c gfmult.c
|
SRCS += gmac.c gfmult.c
|
||||||
|
Loading…
Reference in New Issue
Block a user