unroll the loop slightly... This improves performance enough to
justify, especially for CBC performance where we can't pipeline.. I don't happen to have my measurements handy though... Sponsored by: Netflix, Inc.
This commit is contained in:
parent
754f368cda
commit
a13589bc47
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=285254
@ -1,5 +1,6 @@
|
|||||||
/*-
|
/*-
|
||||||
* Copyright 2013 John-Mark Gurney <jmg@FreeBSD.org>
|
* Copyright 2013 John-Mark Gurney <jmg@FreeBSD.org>
|
||||||
|
* Copyright 2015 Netflix, Inc.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
@ -27,6 +28,9 @@
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#ifndef _AESENCDEC_H_
|
||||||
|
#define _AESENCDEC_H_
|
||||||
|
|
||||||
#include <crypto/aesni/aesni_os.h>
|
#include <crypto/aesni/aesni_os.h>
|
||||||
|
|
||||||
#include <wmmintrin.h>
|
#include <wmmintrin.h>
|
||||||
@ -105,6 +109,7 @@ aesni_dec8(int rounds, const __m128i *keysched, __m128i a,
|
|||||||
out[7] = _mm_aesdeclast_si128(h, keysched[i + 1]);
|
out[7] = _mm_aesdeclast_si128(h, keysched[i + 1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* rounds is passed in as rounds - 1 */
|
||||||
static inline __m128i
|
static inline __m128i
|
||||||
aesni_enc(int rounds, const __m128i *keysched, const __m128i from)
|
aesni_enc(int rounds, const __m128i *keysched, const __m128i from)
|
||||||
{
|
{
|
||||||
@ -112,11 +117,13 @@ aesni_enc(int rounds, const __m128i *keysched, const __m128i from)
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
tmp = from ^ keysched[0];
|
tmp = from ^ keysched[0];
|
||||||
|
for (i = 1; i < rounds; i += 2) {
|
||||||
for (i = 0; i < rounds; i++)
|
tmp = _mm_aesenc_si128(tmp, keysched[i]);
|
||||||
tmp = _mm_aesenc_si128(tmp, keysched[i + 1]);
|
tmp = _mm_aesenc_si128(tmp, keysched[i + 1]);
|
||||||
|
}
|
||||||
|
|
||||||
return _mm_aesenclast_si128(tmp, keysched[i + 1]);
|
tmp = _mm_aesenc_si128(tmp, keysched[rounds]);
|
||||||
|
return _mm_aesenclast_si128(tmp, keysched[rounds + 1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __m128i
|
static inline __m128i
|
||||||
@ -127,8 +134,13 @@ aesni_dec(int rounds, const __m128i *keysched, const __m128i from)
|
|||||||
|
|
||||||
tmp = from ^ keysched[0];
|
tmp = from ^ keysched[0];
|
||||||
|
|
||||||
for (i = 0; i < rounds; i++)
|
for (i = 1; i < rounds; i += 2) {
|
||||||
|
tmp = _mm_aesdec_si128(tmp, keysched[i]);
|
||||||
tmp = _mm_aesdec_si128(tmp, keysched[i + 1]);
|
tmp = _mm_aesdec_si128(tmp, keysched[i + 1]);
|
||||||
|
}
|
||||||
|
|
||||||
return _mm_aesdeclast_si128(tmp, keysched[i + 1]);
|
tmp = _mm_aesdec_si128(tmp, keysched[rounds]);
|
||||||
|
return _mm_aesdeclast_si128(tmp, keysched[rounds + 1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif /* _AESENCDEC_H_ */
|
||||||
|
Loading…
Reference in New Issue
Block a user