From a13589bc475a948f69bb241ab314f3dcdf77283c Mon Sep 17 00:00:00 2001 From: John-Mark Gurney Date: Tue, 7 Jul 2015 20:31:09 +0000 Subject: [PATCH] unroll the loop slightly... This improves performance enough to justify, especially for CBC performance where we can't pipeline.. I don't happen to have my measurements handy though... Sponsored by: Netflix, Inc. --- sys/crypto/aesni/aesencdec.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/sys/crypto/aesni/aesencdec.h b/sys/crypto/aesni/aesencdec.h index 76e640356a80..80951a48aad4 100644 --- a/sys/crypto/aesni/aesencdec.h +++ b/sys/crypto/aesni/aesencdec.h @@ -1,5 +1,6 @@ /*- * Copyright 2013 John-Mark Gurney + * Copyright 2015 Netflix, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,6 +28,9 @@ * */ +#ifndef _AESENCDEC_H_ +#define _AESENCDEC_H_ + #include #include @@ -105,6 +109,7 @@ aesni_dec8(int rounds, const __m128i *keysched, __m128i a, out[7] = _mm_aesdeclast_si128(h, keysched[i + 1]); } +/* rounds is passed in as rounds - 1 */ static inline __m128i aesni_enc(int rounds, const __m128i *keysched, const __m128i from) { @@ -112,11 +117,13 @@ aesni_enc(int rounds, const __m128i *keysched, const __m128i from) int i; tmp = from ^ keysched[0]; - - for (i = 0; i < rounds; i++) + for (i = 1; i < rounds; i += 2) { + tmp = _mm_aesenc_si128(tmp, keysched[i]); tmp = _mm_aesenc_si128(tmp, keysched[i + 1]); + } - return _mm_aesenclast_si128(tmp, keysched[i + 1]); + tmp = _mm_aesenc_si128(tmp, keysched[rounds]); + return _mm_aesenclast_si128(tmp, keysched[rounds + 1]); } static inline __m128i @@ -127,8 +134,13 @@ aesni_dec(int rounds, const __m128i *keysched, const __m128i from) tmp = from ^ keysched[0]; - for (i = 0; i < rounds; i++) + for (i = 1; i < rounds; i += 2) { + tmp = _mm_aesdec_si128(tmp, keysched[i]); tmp = _mm_aesdec_si128(tmp, keysched[i + 1]); + } - return _mm_aesdeclast_si128(tmp, keysched[i + 1]); + tmp = _mm_aesdec_si128(tmp, keysched[rounds]); + return _mm_aesdeclast_si128(tmp, keysched[rounds + 1]); } + +#endif /* _AESENCDEC_H_ */