armv8crypto: Use cursors to access crypto buffer data
Currently armv8crypto copies the scheme used in aesni(9), where payload data and output buffers are allocated on the fly if the crypto buffer is not virtually contiguous. This scheme is simple but incurs a lot of overhead: for an encryption request with a separate output buffer we have to - allocate a temporary buffer to hold the payload - copy input data into the buffer - copy the encrypted payload to the output buffer - zero the temporary buffer before freeing it We have a handy crypto buffer cursor abstraction now, so reimplement the armv8crypto routines using that instead of temporary buffers. This introduces some extra complexity, but gallatin@ reports a 10% throughput improvement with a KTLS workload without additional CPU usage. The driver still allocates an AAD buffer for AES-GCM if necessary. Reviewed by: jhb Tested by: gallatin Sponsored by: Ampere Computing LLC Submitted by: Klara Inc. MFC after: 1 month Differential Revision: https://reviews.freebsd.org/D28950
This commit is contained in:
parent
0b3235ef74
commit
26b08c5d21
@ -378,30 +378,23 @@ static int
|
|||||||
armv8_crypto_cipher_process(struct armv8_crypto_session *ses,
|
armv8_crypto_cipher_process(struct armv8_crypto_session *ses,
|
||||||
struct cryptop *crp)
|
struct cryptop *crp)
|
||||||
{
|
{
|
||||||
|
struct crypto_buffer_cursor fromc, toc;
|
||||||
const struct crypto_session_params *csp;
|
const struct crypto_session_params *csp;
|
||||||
struct fpu_kern_ctx *ctx;
|
struct fpu_kern_ctx *ctx;
|
||||||
uint8_t *buf, *authbuf, *outbuf;
|
uint8_t *authbuf;
|
||||||
uint8_t iv[AES_BLOCK_LEN], tag[GMAC_DIGEST_LEN];
|
uint8_t iv[AES_BLOCK_LEN], tag[GMAC_DIGEST_LEN];
|
||||||
int allocated, authallocated, outallocated, i;
|
int authallocated, i;
|
||||||
int encflag;
|
int encflag;
|
||||||
int kt;
|
int kt;
|
||||||
int error;
|
int error;
|
||||||
bool outcopy;
|
|
||||||
|
|
||||||
csp = crypto_get_params(crp->crp_session);
|
csp = crypto_get_params(crp->crp_session);
|
||||||
encflag = CRYPTO_OP_IS_ENCRYPT(crp->crp_op);
|
encflag = CRYPTO_OP_IS_ENCRYPT(crp->crp_op);
|
||||||
|
|
||||||
allocated = 0;
|
|
||||||
outallocated = 0;
|
|
||||||
authallocated = 0;
|
authallocated = 0;
|
||||||
authbuf = NULL;
|
authbuf = NULL;
|
||||||
kt = 1;
|
kt = 1;
|
||||||
|
|
||||||
buf = armv8_crypto_cipher_alloc(crp, crp->crp_payload_start,
|
|
||||||
crp->crp_payload_length, &allocated);
|
|
||||||
if (buf == NULL)
|
|
||||||
return (ENOMEM);
|
|
||||||
|
|
||||||
if (csp->csp_cipher_alg == CRYPTO_AES_NIST_GCM_16) {
|
if (csp->csp_cipher_alg == CRYPTO_AES_NIST_GCM_16) {
|
||||||
if (crp->crp_aad != NULL)
|
if (crp->crp_aad != NULL)
|
||||||
authbuf = crp->crp_aad;
|
authbuf = crp->crp_aad;
|
||||||
@ -413,28 +406,13 @@ armv8_crypto_cipher_process(struct armv8_crypto_session *ses,
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
crypto_cursor_init(&fromc, &crp->crp_buf);
|
||||||
|
crypto_cursor_advance(&fromc, crp->crp_payload_start);
|
||||||
if (CRYPTO_HAS_OUTPUT_BUFFER(crp)) {
|
if (CRYPTO_HAS_OUTPUT_BUFFER(crp)) {
|
||||||
outbuf = crypto_buffer_contiguous_subsegment(&crp->crp_obuf,
|
crypto_cursor_init(&toc, &crp->crp_obuf);
|
||||||
crp->crp_payload_output_start, crp->crp_payload_length);
|
crypto_cursor_advance(&toc, crp->crp_payload_output_start);
|
||||||
if (outbuf == NULL) {
|
|
||||||
outcopy = true;
|
|
||||||
if (allocated)
|
|
||||||
outbuf = buf;
|
|
||||||
else {
|
|
||||||
outbuf = malloc(crp->crp_payload_length,
|
|
||||||
M_ARMV8_CRYPTO, M_NOWAIT);
|
|
||||||
if (outbuf == NULL) {
|
|
||||||
error = ENOMEM;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
outallocated = true;
|
|
||||||
}
|
|
||||||
} else
|
|
||||||
outcopy = false;
|
|
||||||
} else {
|
} else {
|
||||||
outbuf = buf;
|
crypto_cursor_copy(&fromc, &toc);
|
||||||
outcopy = allocated;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
kt = is_fpu_kern_thread(0);
|
kt = is_fpu_kern_thread(0);
|
||||||
@ -451,7 +429,6 @@ armv8_crypto_cipher_process(struct armv8_crypto_session *ses,
|
|||||||
|
|
||||||
crypto_read_iv(crp, iv);
|
crypto_read_iv(crp, iv);
|
||||||
|
|
||||||
/* Do work */
|
|
||||||
switch (csp->csp_cipher_alg) {
|
switch (csp->csp_cipher_alg) {
|
||||||
case CRYPTO_AES_CBC:
|
case CRYPTO_AES_CBC:
|
||||||
if ((crp->crp_payload_length % AES_BLOCK_LEN) != 0) {
|
if ((crp->crp_payload_length % AES_BLOCK_LEN) != 0) {
|
||||||
@ -460,51 +437,41 @@ armv8_crypto_cipher_process(struct armv8_crypto_session *ses,
|
|||||||
}
|
}
|
||||||
if (encflag)
|
if (encflag)
|
||||||
armv8_aes_encrypt_cbc(&ses->enc_schedule,
|
armv8_aes_encrypt_cbc(&ses->enc_schedule,
|
||||||
crp->crp_payload_length, buf, buf, iv);
|
crp->crp_payload_length, &fromc, &toc, iv);
|
||||||
else
|
else
|
||||||
armv8_aes_decrypt_cbc(&ses->dec_schedule,
|
armv8_aes_decrypt_cbc(&ses->dec_schedule,
|
||||||
crp->crp_payload_length, buf, iv);
|
crp->crp_payload_length, &fromc, &toc, iv);
|
||||||
break;
|
break;
|
||||||
case CRYPTO_AES_XTS:
|
case CRYPTO_AES_XTS:
|
||||||
if (encflag)
|
if (encflag)
|
||||||
armv8_aes_encrypt_xts(&ses->enc_schedule,
|
armv8_aes_encrypt_xts(&ses->enc_schedule,
|
||||||
&ses->xts_schedule.aes_key, crp->crp_payload_length, buf,
|
&ses->xts_schedule.aes_key, crp->crp_payload_length,
|
||||||
buf, iv);
|
&fromc, &toc, iv);
|
||||||
else
|
else
|
||||||
armv8_aes_decrypt_xts(&ses->dec_schedule,
|
armv8_aes_decrypt_xts(&ses->dec_schedule,
|
||||||
&ses->xts_schedule.aes_key, crp->crp_payload_length, buf,
|
&ses->xts_schedule.aes_key, crp->crp_payload_length,
|
||||||
buf, iv);
|
&fromc, &toc, iv);
|
||||||
break;
|
break;
|
||||||
case CRYPTO_AES_NIST_GCM_16:
|
case CRYPTO_AES_NIST_GCM_16:
|
||||||
if (encflag) {
|
if (encflag) {
|
||||||
memset(tag, 0, sizeof(tag));
|
memset(tag, 0, sizeof(tag));
|
||||||
armv8_aes_encrypt_gcm(&ses->enc_schedule,
|
armv8_aes_encrypt_gcm(&ses->enc_schedule,
|
||||||
crp->crp_payload_length,
|
crp->crp_payload_length, &fromc, &toc,
|
||||||
buf, outbuf,
|
crp->crp_aad_length, authbuf, tag, iv, ses->Htable);
|
||||||
crp->crp_aad_length, authbuf,
|
|
||||||
tag, iv, ses->Htable);
|
|
||||||
crypto_copyback(crp, crp->crp_digest_start, sizeof(tag),
|
crypto_copyback(crp, crp->crp_digest_start, sizeof(tag),
|
||||||
tag);
|
tag);
|
||||||
} else {
|
} else {
|
||||||
crypto_copydata(crp, crp->crp_digest_start, sizeof(tag),
|
crypto_copydata(crp, crp->crp_digest_start, sizeof(tag),
|
||||||
tag);
|
tag);
|
||||||
if (armv8_aes_decrypt_gcm(&ses->enc_schedule,
|
error = armv8_aes_decrypt_gcm(&ses->enc_schedule,
|
||||||
crp->crp_payload_length,
|
crp->crp_payload_length, &fromc, &toc,
|
||||||
buf, outbuf,
|
crp->crp_aad_length, authbuf, tag, iv, ses->Htable);
|
||||||
crp->crp_aad_length, authbuf,
|
if (error != 0)
|
||||||
tag, iv, ses->Htable) != 0) {
|
|
||||||
error = EBADMSG;
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (outcopy)
|
|
||||||
crypto_copyback(crp, CRYPTO_HAS_OUTPUT_BUFFER(crp) ?
|
|
||||||
crp->crp_payload_output_start : crp->crp_payload_start,
|
|
||||||
crp->crp_payload_length, outbuf);
|
|
||||||
|
|
||||||
error = 0;
|
error = 0;
|
||||||
out:
|
out:
|
||||||
if (!kt) {
|
if (!kt) {
|
||||||
@ -512,12 +479,8 @@ armv8_crypto_cipher_process(struct armv8_crypto_session *ses,
|
|||||||
RELEASE_CTX(i, ctx);
|
RELEASE_CTX(i, ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (allocated)
|
|
||||||
zfree(buf, M_ARMV8_CRYPTO);
|
|
||||||
if (authallocated)
|
if (authallocated)
|
||||||
zfree(authbuf, M_ARMV8_CRYPTO);
|
zfree(authbuf, M_ARMV8_CRYPTO);
|
||||||
if (outallocated)
|
|
||||||
zfree(outbuf, M_ARMV8_CRYPTO);
|
|
||||||
explicit_bzero(iv, sizeof(iv));
|
explicit_bzero(iv, sizeof(iv));
|
||||||
explicit_bzero(tag, sizeof(tag));
|
explicit_bzero(tag, sizeof(tag));
|
||||||
|
|
||||||
|
@ -63,24 +63,29 @@ void gcm_init_v8(__uint128_val_t Htable[16], const uint64_t Xi[2]);
|
|||||||
void gcm_gmult_v8(uint64_t Xi[2], const __uint128_val_t Htable[16]);
|
void gcm_gmult_v8(uint64_t Xi[2], const __uint128_val_t Htable[16]);
|
||||||
void gcm_ghash_v8(uint64_t Xi[2], const __uint128_val_t Htable[16], const uint8_t *inp, size_t len);
|
void gcm_ghash_v8(uint64_t Xi[2], const __uint128_val_t Htable[16], const uint8_t *inp, size_t len);
|
||||||
|
|
||||||
void armv8_aes_encrypt_cbc(const AES_key_t *, size_t, const uint8_t *,
|
void armv8_aes_encrypt_cbc(const AES_key_t *key, size_t len,
|
||||||
uint8_t *, const uint8_t[static AES_BLOCK_LEN]);
|
struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
|
||||||
void armv8_aes_decrypt_cbc(const AES_key_t *, size_t, uint8_t *,
|
const uint8_t iv[static AES_BLOCK_LEN]);
|
||||||
|
void armv8_aes_decrypt_cbc(const AES_key_t *, size_t,
|
||||||
|
struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
|
||||||
const uint8_t[static AES_BLOCK_LEN]);
|
const uint8_t[static AES_BLOCK_LEN]);
|
||||||
void armv8_aes_encrypt_gcm(AES_key_t *, size_t, const uint8_t *,
|
void armv8_aes_encrypt_gcm(AES_key_t *, size_t,
|
||||||
uint8_t *, size_t, const uint8_t*,
|
struct crypto_buffer_cursor *, struct crypto_buffer_cursor *,
|
||||||
|
size_t, const uint8_t *,
|
||||||
uint8_t tag[static GMAC_DIGEST_LEN],
|
uint8_t tag[static GMAC_DIGEST_LEN],
|
||||||
const uint8_t[static AES_BLOCK_LEN],
|
const uint8_t[static AES_BLOCK_LEN],
|
||||||
const __uint128_val_t *);
|
const __uint128_val_t *);
|
||||||
int armv8_aes_decrypt_gcm(AES_key_t *, size_t, const uint8_t *,
|
int armv8_aes_decrypt_gcm(AES_key_t *, size_t,
|
||||||
uint8_t *, size_t, const uint8_t*,
|
struct crypto_buffer_cursor *, struct crypto_buffer_cursor *,
|
||||||
const uint8_t tag[static GMAC_DIGEST_LEN],
|
size_t, const uint8_t *, const uint8_t tag[static GMAC_DIGEST_LEN],
|
||||||
const uint8_t[static AES_BLOCK_LEN],
|
const uint8_t[static AES_BLOCK_LEN],
|
||||||
const __uint128_val_t *);
|
const __uint128_val_t *);
|
||||||
|
|
||||||
void armv8_aes_encrypt_xts(AES_key_t *, const void *, size_t,
|
void armv8_aes_encrypt_xts(AES_key_t *, const void *, size_t,
|
||||||
const uint8_t *, uint8_t *, const uint8_t[AES_BLOCK_LEN]);
|
struct crypto_buffer_cursor *, struct crypto_buffer_cursor *,
|
||||||
|
const uint8_t[AES_BLOCK_LEN]);
|
||||||
void armv8_aes_decrypt_xts(AES_key_t *, const void *, size_t,
|
void armv8_aes_decrypt_xts(AES_key_t *, const void *, size_t,
|
||||||
const uint8_t *, uint8_t *, const uint8_t[AES_BLOCK_LEN]);
|
struct crypto_buffer_cursor *, struct crypto_buffer_cursor *,
|
||||||
|
const uint8_t[AES_BLOCK_LEN]);
|
||||||
|
|
||||||
#endif /* _ARMV8_CRYPTO_H_ */
|
#endif /* _ARMV8_CRYPTO_H_ */
|
||||||
|
@ -101,41 +101,97 @@ armv8_aes_dec(int rounds, const uint8x16_t *keysched, const uint8x16_t from)
|
|||||||
|
|
||||||
void
|
void
|
||||||
armv8_aes_encrypt_cbc(const AES_key_t *key, size_t len,
|
armv8_aes_encrypt_cbc(const AES_key_t *key, size_t len,
|
||||||
const uint8_t *from, uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN])
|
struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
|
||||||
|
const uint8_t iv[static AES_BLOCK_LEN])
|
||||||
{
|
{
|
||||||
uint8x16_t tot, ivreg, tmp;
|
uint8x16_t tot, ivreg, tmp;
|
||||||
size_t i;
|
uint8_t block[AES_BLOCK_LEN], *from, *to;
|
||||||
|
size_t fromseglen, oseglen, seglen, toseglen;
|
||||||
|
|
||||||
|
KASSERT(len % AES_BLOCK_LEN == 0,
|
||||||
|
("%s: length %zu not a multiple of the block size", __func__, len));
|
||||||
|
|
||||||
len /= AES_BLOCK_LEN;
|
|
||||||
ivreg = vld1q_u8(iv);
|
ivreg = vld1q_u8(iv);
|
||||||
for (i = 0; i < len; i++) {
|
for (; len > 0; len -= seglen) {
|
||||||
tmp = vld1q_u8(from);
|
from = crypto_cursor_segment(fromc, &fromseglen);
|
||||||
|
to = crypto_cursor_segment(toc, &toseglen);
|
||||||
|
|
||||||
|
seglen = ulmin(len, ulmin(fromseglen, toseglen));
|
||||||
|
if (seglen < AES_BLOCK_LEN) {
|
||||||
|
crypto_cursor_copydata(fromc, AES_BLOCK_LEN, block);
|
||||||
|
tmp = vld1q_u8(block);
|
||||||
tot = armv8_aes_enc(key->aes_rounds - 1,
|
tot = armv8_aes_enc(key->aes_rounds - 1,
|
||||||
(const void *)key->aes_key, veorq_u8(tmp, ivreg));
|
(const void *)key->aes_key, veorq_u8(tmp, ivreg));
|
||||||
ivreg = tot;
|
ivreg = tot;
|
||||||
|
vst1q_u8(block, tot);
|
||||||
|
crypto_cursor_copyback(toc, AES_BLOCK_LEN, block);
|
||||||
|
seglen = AES_BLOCK_LEN;
|
||||||
|
} else {
|
||||||
|
for (oseglen = seglen; seglen >= AES_BLOCK_LEN;
|
||||||
|
seglen -= AES_BLOCK_LEN) {
|
||||||
|
tmp = vld1q_u8(from);
|
||||||
|
tot = armv8_aes_enc(key->aes_rounds - 1,
|
||||||
|
(const void *)key->aes_key,
|
||||||
|
veorq_u8(tmp, ivreg));
|
||||||
|
ivreg = tot;
|
||||||
vst1q_u8(to, tot);
|
vst1q_u8(to, tot);
|
||||||
from += AES_BLOCK_LEN;
|
from += AES_BLOCK_LEN;
|
||||||
to += AES_BLOCK_LEN;
|
to += AES_BLOCK_LEN;
|
||||||
}
|
}
|
||||||
|
seglen = oseglen - seglen;
|
||||||
|
crypto_cursor_advance(fromc, seglen);
|
||||||
|
crypto_cursor_advance(toc, seglen);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
explicit_bzero(block, sizeof(block));
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
armv8_aes_decrypt_cbc(const AES_key_t *key, size_t len,
|
armv8_aes_decrypt_cbc(const AES_key_t *key, size_t len,
|
||||||
uint8_t *buf, const uint8_t iv[static AES_BLOCK_LEN])
|
struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
|
||||||
|
const uint8_t iv[static AES_BLOCK_LEN])
|
||||||
{
|
{
|
||||||
uint8x16_t ivreg, nextiv, tmp;
|
uint8x16_t ivreg, nextiv, tmp;
|
||||||
size_t i;
|
uint8_t block[AES_BLOCK_LEN], *from, *to;
|
||||||
|
size_t fromseglen, oseglen, seglen, toseglen;
|
||||||
|
|
||||||
|
KASSERT(len % AES_BLOCK_LEN == 0,
|
||||||
|
("%s: length %zu not a multiple of the block size", __func__, len));
|
||||||
|
|
||||||
len /= AES_BLOCK_LEN;
|
|
||||||
ivreg = vld1q_u8(iv);
|
ivreg = vld1q_u8(iv);
|
||||||
for (i = 0; i < len; i++) {
|
for (; len > 0; len -= seglen) {
|
||||||
nextiv = vld1q_u8(buf);
|
from = crypto_cursor_segment(fromc, &fromseglen);
|
||||||
|
to = crypto_cursor_segment(toc, &toseglen);
|
||||||
|
|
||||||
|
seglen = ulmin(len, ulmin(fromseglen, toseglen));
|
||||||
|
if (seglen < AES_BLOCK_LEN) {
|
||||||
|
crypto_cursor_copydata(fromc, AES_BLOCK_LEN, block);
|
||||||
|
nextiv = vld1q_u8(block);
|
||||||
tmp = armv8_aes_dec(key->aes_rounds - 1,
|
tmp = armv8_aes_dec(key->aes_rounds - 1,
|
||||||
(const void *)key->aes_key, nextiv);
|
(const void *)key->aes_key, nextiv);
|
||||||
vst1q_u8(buf, veorq_u8(tmp, ivreg));
|
vst1q_u8(block, veorq_u8(tmp, ivreg));
|
||||||
ivreg = nextiv;
|
ivreg = nextiv;
|
||||||
buf += AES_BLOCK_LEN;
|
crypto_cursor_copyback(toc, AES_BLOCK_LEN, block);
|
||||||
|
seglen = AES_BLOCK_LEN;
|
||||||
|
} else {
|
||||||
|
for (oseglen = seglen; seglen >= AES_BLOCK_LEN;
|
||||||
|
seglen -= AES_BLOCK_LEN) {
|
||||||
|
nextiv = vld1q_u8(from);
|
||||||
|
tmp = armv8_aes_dec(key->aes_rounds - 1,
|
||||||
|
(const void *)key->aes_key, nextiv);
|
||||||
|
vst1q_u8(to, veorq_u8(tmp, ivreg));
|
||||||
|
ivreg = nextiv;
|
||||||
|
from += AES_BLOCK_LEN;
|
||||||
|
to += AES_BLOCK_LEN;
|
||||||
}
|
}
|
||||||
|
crypto_cursor_advance(fromc, oseglen - seglen);
|
||||||
|
crypto_cursor_advance(toc, oseglen - seglen);
|
||||||
|
seglen = oseglen - seglen;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
explicit_bzero(block, sizeof(block));
|
||||||
}
|
}
|
||||||
|
|
||||||
#define AES_XTS_BLOCKSIZE 16
|
#define AES_XTS_BLOCKSIZE 16
|
||||||
@ -180,12 +236,18 @@ armv8_aes_crypt_xts_block(int rounds, const uint8x16_t *key_schedule,
|
|||||||
|
|
||||||
static void
|
static void
|
||||||
armv8_aes_crypt_xts(int rounds, const uint8x16_t *data_schedule,
|
armv8_aes_crypt_xts(int rounds, const uint8x16_t *data_schedule,
|
||||||
const uint8x16_t *tweak_schedule, size_t len, const uint8_t *from,
|
const uint8x16_t *tweak_schedule, size_t len,
|
||||||
uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN], int do_encrypt)
|
struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
|
||||||
|
const uint8_t iv[static AES_BLOCK_LEN], int do_encrypt)
|
||||||
{
|
{
|
||||||
uint8x16_t tweakreg;
|
uint8x16_t tweakreg;
|
||||||
|
uint8_t block[AES_XTS_BLOCKSIZE] __aligned(16);
|
||||||
uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
|
uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
|
||||||
size_t i, cnt;
|
uint8_t *from, *to;
|
||||||
|
size_t fromseglen, oseglen, seglen, toseglen;
|
||||||
|
|
||||||
|
KASSERT(len % AES_XTS_BLOCKSIZE == 0,
|
||||||
|
("%s: length %zu not a multiple of the block size", __func__, len));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Prepare tweak as E_k2(IV). IV is specified as LE representation
|
* Prepare tweak as E_k2(IV). IV is specified as LE representation
|
||||||
@ -201,38 +263,57 @@ armv8_aes_crypt_xts(int rounds, const uint8x16_t *data_schedule,
|
|||||||
tweakreg = vld1q_u8(tweak);
|
tweakreg = vld1q_u8(tweak);
|
||||||
tweakreg = armv8_aes_enc(rounds - 1, tweak_schedule, tweakreg);
|
tweakreg = armv8_aes_enc(rounds - 1, tweak_schedule, tweakreg);
|
||||||
|
|
||||||
cnt = len / AES_XTS_BLOCKSIZE;
|
for (; len > 0; len -= seglen) {
|
||||||
for (i = 0; i < cnt; i++) {
|
from = crypto_cursor_segment(fromc, &fromseglen);
|
||||||
armv8_aes_crypt_xts_block(rounds, data_schedule, &tweakreg,
|
to = crypto_cursor_segment(toc, &toseglen);
|
||||||
from, to, do_encrypt);
|
|
||||||
|
seglen = ulmin(len, ulmin(fromseglen, toseglen));
|
||||||
|
if (seglen < AES_XTS_BLOCKSIZE) {
|
||||||
|
printf("%d seglen %zu\n", __LINE__, seglen);
|
||||||
|
crypto_cursor_copydata(fromc, AES_XTS_BLOCKSIZE, block);
|
||||||
|
armv8_aes_crypt_xts_block(rounds, data_schedule,
|
||||||
|
&tweakreg, block, block, do_encrypt);
|
||||||
|
crypto_cursor_copyback(toc, AES_XTS_BLOCKSIZE, block);
|
||||||
|
seglen = AES_XTS_BLOCKSIZE;
|
||||||
|
} else {
|
||||||
|
printf("%d seglen %zu\n", __LINE__, seglen);
|
||||||
|
for (oseglen = seglen; seglen >= AES_XTS_BLOCKSIZE;
|
||||||
|
seglen -= AES_XTS_BLOCKSIZE) {
|
||||||
|
armv8_aes_crypt_xts_block(rounds, data_schedule,
|
||||||
|
&tweakreg, from, to, do_encrypt);
|
||||||
from += AES_XTS_BLOCKSIZE;
|
from += AES_XTS_BLOCKSIZE;
|
||||||
to += AES_XTS_BLOCKSIZE;
|
to += AES_XTS_BLOCKSIZE;
|
||||||
}
|
}
|
||||||
|
seglen = oseglen - seglen;
|
||||||
|
crypto_cursor_advance(fromc, seglen);
|
||||||
|
crypto_cursor_advance(toc, seglen);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
explicit_bzero(block, sizeof(block));
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
armv8_aes_encrypt_xts(AES_key_t *data_schedule,
|
armv8_aes_encrypt_xts(AES_key_t *data_schedule,
|
||||||
const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
|
const void *tweak_schedule, size_t len, struct crypto_buffer_cursor *fromc,
|
||||||
const uint8_t iv[static AES_BLOCK_LEN])
|
struct crypto_buffer_cursor *toc, const uint8_t iv[static AES_BLOCK_LEN])
|
||||||
{
|
{
|
||||||
|
|
||||||
armv8_aes_crypt_xts(data_schedule->aes_rounds,
|
armv8_aes_crypt_xts(data_schedule->aes_rounds,
|
||||||
(const void *)&data_schedule->aes_key, tweak_schedule, len, from,
|
(const void *)&data_schedule->aes_key, tweak_schedule, len, fromc,
|
||||||
to, iv, 1);
|
toc, iv, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
armv8_aes_decrypt_xts(AES_key_t *data_schedule,
|
armv8_aes_decrypt_xts(AES_key_t *data_schedule,
|
||||||
const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
|
const void *tweak_schedule, size_t len,
|
||||||
|
struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
|
||||||
const uint8_t iv[static AES_BLOCK_LEN])
|
const uint8_t iv[static AES_BLOCK_LEN])
|
||||||
{
|
{
|
||||||
|
|
||||||
armv8_aes_crypt_xts(data_schedule->aes_rounds,
|
armv8_aes_crypt_xts(data_schedule->aes_rounds,
|
||||||
(const void *)&data_schedule->aes_key, tweak_schedule, len, from,
|
(const void *)&data_schedule->aes_key, tweak_schedule, len, fromc,
|
||||||
to,iv, 0);
|
toc, iv, 0);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define AES_INC_COUNTER(counter) \
|
#define AES_INC_COUNTER(counter) \
|
||||||
do { \
|
do { \
|
||||||
for (int pos = AES_BLOCK_LEN - 1; \
|
for (int pos = AES_BLOCK_LEN - 1; \
|
||||||
@ -296,115 +377,161 @@ armv8_aes_gmac_finish(struct armv8_gcm_state *s, size_t len,
|
|||||||
s->Xi.u[1] ^= s->EK0.u[1];
|
s->Xi.u[1] ^= s->EK0.u[1];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
armv8_aes_encrypt_gcm_block(struct armv8_gcm_state *s, AES_key_t *aes_key,
|
||||||
|
const uint64_t *from, uint64_t *to)
|
||||||
|
{
|
||||||
|
aes_v8_encrypt(s->aes_counter, s->EKi.c, aes_key);
|
||||||
|
AES_INC_COUNTER(s->aes_counter);
|
||||||
|
to[0] = from[0] ^ s->EKi.u[0];
|
||||||
|
to[1] = from[1] ^ s->EKi.u[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
armv8_aes_decrypt_gcm_block(struct armv8_gcm_state *s, AES_key_t *aes_key,
|
||||||
|
const uint64_t *from, uint64_t *to)
|
||||||
|
{
|
||||||
|
armv8_aes_encrypt_gcm_block(s, aes_key, from, to);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
armv8_aes_encrypt_gcm(AES_key_t *aes_key, size_t len,
|
armv8_aes_encrypt_gcm(AES_key_t *aes_key, size_t len,
|
||||||
const uint8_t *from, uint8_t *to,
|
struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
|
||||||
size_t authdatalen, const uint8_t *authdata,
|
size_t authdatalen, const uint8_t *authdata,
|
||||||
uint8_t tag[static GMAC_DIGEST_LEN],
|
uint8_t tag[static GMAC_DIGEST_LEN],
|
||||||
const uint8_t iv[static AES_GCM_IV_LEN],
|
const uint8_t iv[static AES_GCM_IV_LEN],
|
||||||
const __uint128_val_t *Htable)
|
const __uint128_val_t *Htable)
|
||||||
{
|
{
|
||||||
struct armv8_gcm_state s;
|
struct armv8_gcm_state s;
|
||||||
const uint64_t *from64;
|
uint8_t block[AES_BLOCK_LEN] __aligned(AES_BLOCK_LEN);
|
||||||
uint64_t *to64;
|
uint64_t *from64, *to64;
|
||||||
uint8_t block[AES_BLOCK_LEN];
|
size_t fromseglen, i, olen, oseglen, seglen, toseglen;
|
||||||
size_t i, trailer;
|
|
||||||
|
|
||||||
armv8_aes_gmac_setup(&s, aes_key, authdata, authdatalen, iv, Htable);
|
armv8_aes_gmac_setup(&s, aes_key, authdata, authdatalen, iv, Htable);
|
||||||
|
|
||||||
from64 = (const uint64_t *)from;
|
for (olen = len; len > 0; len -= seglen) {
|
||||||
to64 = (uint64_t *)to;
|
from64 = crypto_cursor_segment(fromc, &fromseglen);
|
||||||
trailer = len % AES_BLOCK_LEN;
|
to64 = crypto_cursor_segment(toc, &toseglen);
|
||||||
|
|
||||||
for (i = 0; i < (len - trailer); i += AES_BLOCK_LEN) {
|
seglen = ulmin(len, ulmin(fromseglen, toseglen));
|
||||||
aes_v8_encrypt(s.aes_counter, s.EKi.c, aes_key);
|
if (seglen < AES_BLOCK_LEN) {
|
||||||
AES_INC_COUNTER(s.aes_counter);
|
seglen = ulmin(len, AES_BLOCK_LEN);
|
||||||
to64[0] = from64[0] ^ s.EKi.u[0];
|
|
||||||
to64[1] = from64[1] ^ s.EKi.u[1];
|
|
||||||
gcm_ghash_v8(s.Xi.u, Htable, (uint8_t*)to64, AES_BLOCK_LEN);
|
|
||||||
|
|
||||||
to64 += 2;
|
|
||||||
from64 += 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
to += (len - trailer);
|
|
||||||
from += (len - trailer);
|
|
||||||
|
|
||||||
if (trailer) {
|
|
||||||
aes_v8_encrypt(s.aes_counter, s.EKi.c, aes_key);
|
|
||||||
AES_INC_COUNTER(s.aes_counter);
|
|
||||||
memset(block, 0, sizeof(block));
|
memset(block, 0, sizeof(block));
|
||||||
for (i = 0; i < trailer; i++) {
|
crypto_cursor_copydata(fromc, (int)seglen, block);
|
||||||
block[i] = to[i] = from[i] ^ s.EKi.c[i];
|
|
||||||
|
if (seglen == AES_BLOCK_LEN) {
|
||||||
|
armv8_aes_encrypt_gcm_block(&s, aes_key,
|
||||||
|
(uint64_t *)block, (uint64_t *)block);
|
||||||
|
} else {
|
||||||
|
aes_v8_encrypt(s.aes_counter, s.EKi.c, aes_key);
|
||||||
|
AES_INC_COUNTER(s.aes_counter);
|
||||||
|
for (i = 0; i < seglen; i++)
|
||||||
|
block[i] ^= s.EKi.c[i];
|
||||||
|
}
|
||||||
|
gcm_ghash_v8(s.Xi.u, Htable, block, seglen);
|
||||||
|
|
||||||
|
crypto_cursor_copyback(toc, (int)seglen, block);
|
||||||
|
} else {
|
||||||
|
for (oseglen = seglen; seglen >= AES_BLOCK_LEN;
|
||||||
|
seglen -= AES_BLOCK_LEN) {
|
||||||
|
armv8_aes_encrypt_gcm_block(&s, aes_key, from64,
|
||||||
|
to64);
|
||||||
|
gcm_ghash_v8(s.Xi.u, Htable, (uint8_t *)to64,
|
||||||
|
AES_BLOCK_LEN);
|
||||||
|
|
||||||
|
from64 += 2;
|
||||||
|
to64 += 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
gcm_ghash_v8(s.Xi.u, Htable, block, AES_BLOCK_LEN);
|
seglen = oseglen - seglen;
|
||||||
|
crypto_cursor_advance(fromc, seglen);
|
||||||
|
crypto_cursor_advance(toc, seglen);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
armv8_aes_gmac_finish(&s, len, authdatalen, Htable);
|
armv8_aes_gmac_finish(&s, olen, authdatalen, Htable);
|
||||||
memcpy(tag, s.Xi.c, GMAC_DIGEST_LEN);
|
memcpy(tag, s.Xi.c, GMAC_DIGEST_LEN);
|
||||||
|
|
||||||
|
explicit_bzero(block, sizeof(block));
|
||||||
explicit_bzero(&s, sizeof(s));
|
explicit_bzero(&s, sizeof(s));
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
armv8_aes_decrypt_gcm(AES_key_t *aes_key, size_t len,
|
armv8_aes_decrypt_gcm(AES_key_t *aes_key, size_t len,
|
||||||
const uint8_t *from, uint8_t *to,
|
struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
|
||||||
size_t authdatalen, const uint8_t *authdata,
|
size_t authdatalen, const uint8_t *authdata,
|
||||||
const uint8_t tag[static GMAC_DIGEST_LEN],
|
const uint8_t tag[static GMAC_DIGEST_LEN],
|
||||||
const uint8_t iv[static AES_GCM_IV_LEN],
|
const uint8_t iv[static AES_GCM_IV_LEN],
|
||||||
const __uint128_val_t *Htable)
|
const __uint128_val_t *Htable)
|
||||||
{
|
{
|
||||||
struct armv8_gcm_state s;
|
struct armv8_gcm_state s;
|
||||||
const uint64_t *from64;
|
struct crypto_buffer_cursor fromcc;
|
||||||
uint64_t *to64;
|
uint8_t block[AES_BLOCK_LEN] __aligned(AES_BLOCK_LEN), *from;
|
||||||
uint8_t block[AES_BLOCK_LEN];
|
uint64_t *block64, *from64, *to64;
|
||||||
size_t i, trailer;
|
size_t fromseglen, olen, oseglen, seglen, toseglen;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
error = 0;
|
|
||||||
|
|
||||||
armv8_aes_gmac_setup(&s, aes_key, authdata, authdatalen, iv, Htable);
|
armv8_aes_gmac_setup(&s, aes_key, authdata, authdatalen, iv, Htable);
|
||||||
|
|
||||||
trailer = len % AES_BLOCK_LEN;
|
crypto_cursor_copy(fromc, &fromcc);
|
||||||
if (len - trailer > 0)
|
for (olen = len; len > 0; len -= seglen) {
|
||||||
gcm_ghash_v8(s.Xi.u, Htable, from, len - trailer);
|
from = crypto_cursor_segment(&fromcc, &fromseglen);
|
||||||
if (trailer > 0) {
|
seglen = ulmin(len, fromseglen);
|
||||||
|
seglen -= seglen % AES_BLOCK_LEN;
|
||||||
|
if (seglen > 0) {
|
||||||
|
gcm_ghash_v8(s.Xi.u, Htable, from, seglen);
|
||||||
|
crypto_cursor_advance(&fromcc, seglen);
|
||||||
|
} else {
|
||||||
memset(block, 0, sizeof(block));
|
memset(block, 0, sizeof(block));
|
||||||
memcpy(block, from + len - trailer, trailer);
|
seglen = ulmin(len, AES_BLOCK_LEN);
|
||||||
gcm_ghash_v8(s.Xi.u, Htable, block, AES_BLOCK_LEN);
|
crypto_cursor_copydata(&fromcc, seglen, block);
|
||||||
|
gcm_ghash_v8(s.Xi.u, Htable, block, seglen);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
armv8_aes_gmac_finish(&s, len, authdatalen, Htable);
|
armv8_aes_gmac_finish(&s, olen, authdatalen, Htable);
|
||||||
|
|
||||||
if (timingsafe_bcmp(tag, s.Xi.c, GMAC_DIGEST_LEN) != 0) {
|
if (timingsafe_bcmp(tag, s.Xi.c, GMAC_DIGEST_LEN) != 0) {
|
||||||
error = EBADMSG;
|
error = EBADMSG;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
from64 = (const uint64_t *)from;
|
block64 = (uint64_t *)block;
|
||||||
to64 = (uint64_t *)to;
|
for (len = olen; len > 0; len -= seglen) {
|
||||||
|
from64 = crypto_cursor_segment(fromc, &fromseglen);
|
||||||
|
to64 = crypto_cursor_segment(toc, &toseglen);
|
||||||
|
|
||||||
|
seglen = ulmin(len, ulmin(fromseglen, toseglen));
|
||||||
|
if (seglen < AES_BLOCK_LEN) {
|
||||||
|
seglen = ulmin(len, AES_BLOCK_LEN);
|
||||||
|
|
||||||
|
memset(block, 0, sizeof(block));
|
||||||
|
crypto_cursor_copydata(fromc, seglen, block);
|
||||||
|
|
||||||
|
armv8_aes_decrypt_gcm_block(&s, aes_key, block64,
|
||||||
|
block64);
|
||||||
|
|
||||||
|
crypto_cursor_copyback(toc, (int)seglen, block);
|
||||||
|
} else {
|
||||||
|
for (oseglen = seglen; seglen >= AES_BLOCK_LEN;
|
||||||
|
seglen -= AES_BLOCK_LEN) {
|
||||||
|
armv8_aes_decrypt_gcm_block(&s, aes_key, from64,
|
||||||
|
to64);
|
||||||
|
|
||||||
for (i = 0; i < (len - trailer); i += AES_BLOCK_LEN) {
|
|
||||||
aes_v8_encrypt(s.aes_counter, s.EKi.c, aes_key);
|
|
||||||
AES_INC_COUNTER(s.aes_counter);
|
|
||||||
to64[0] = from64[0] ^ s.EKi.u[0];
|
|
||||||
to64[1] = from64[1] ^ s.EKi.u[1];
|
|
||||||
to64 += 2;
|
|
||||||
from64 += 2;
|
from64 += 2;
|
||||||
|
to64 += 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
to += (len - trailer);
|
seglen = oseglen - seglen;
|
||||||
from += (len - trailer);
|
crypto_cursor_advance(fromc, seglen);
|
||||||
|
crypto_cursor_advance(toc, seglen);
|
||||||
if (trailer) {
|
}
|
||||||
aes_v8_encrypt(s.aes_counter, s.EKi.c, aes_key);
|
|
||||||
AES_INC_COUNTER(s.aes_counter);
|
|
||||||
for (i = 0; i < trailer; i++)
|
|
||||||
to[i] = from[i] ^ s.EKi.c[i];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
error = 0;
|
||||||
out:
|
out:
|
||||||
|
explicit_bzero(block, sizeof(block));
|
||||||
explicit_bzero(&s, sizeof(s));
|
explicit_bzero(&s, sizeof(s));
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user