SHA256 QAT acceleration
This patch enables acceleration of SHA256 checksums using Intel Quick Assist Technology. This patch also fixes up and refactors some of the code from QAT encryption to make the behavior consistent. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Chengfeix Zhu <chengfeix.zhu@intel.com> Signed-off-by: Weigang Li <weigang.li@intel.com> Signed-off-by: Tom Caputi <tcaputi@datto.com> Closes #7295
This commit is contained in:
parent
8a2a9db8df
commit
3874220932
@ -38,10 +38,12 @@ qat_stats_t qat_stats = {
|
||||
{ "decrypt_total_in_bytes", KSTAT_DATA_UINT64 },
|
||||
{ "decrypt_total_out_bytes", KSTAT_DATA_UINT64 },
|
||||
{ "crypt_fails", KSTAT_DATA_UINT64 },
|
||||
{ "cksum_requests", KSTAT_DATA_UINT64 },
|
||||
{ "cksum_total_in_bytes", KSTAT_DATA_UINT64 },
|
||||
{ "cksum_fails", KSTAT_DATA_UINT64 },
|
||||
};
|
||||
|
||||
static kstat_t *qat_ksp = NULL;
|
||||
int zfs_qat_disable = 0;
|
||||
|
||||
CpaStatus
|
||||
qat_mem_alloc_contig(void **pp_mem_addr, Cpa32U size_bytes)
|
||||
|
@ -122,6 +122,22 @@ typedef struct qat_stats {
|
||||
* so the functionality of ZFS is not impacted.
|
||||
*/
|
||||
kstat_named_t crypt_fails;
|
||||
|
||||
/*
|
||||
* Number of jobs submitted to qat checksum engine.
|
||||
*/
|
||||
kstat_named_t cksum_requests;
|
||||
/*
|
||||
* Total bytes sent to qat checksum engine.
|
||||
*/
|
||||
kstat_named_t cksum_total_in_bytes;
|
||||
/*
|
||||
* Number of fails in the qat checksum engine.
|
||||
* Note: when qat fail happens, it doesn't mean a critical hardware
|
||||
* issue. The checksum job will be transfered to the software
|
||||
* implementation, so the functionality of ZFS is not impacted.
|
||||
*/
|
||||
kstat_named_t cksum_fails;
|
||||
} qat_stats_t;
|
||||
|
||||
#define QAT_STAT_INCR(stat, val) \
|
||||
@ -130,7 +146,6 @@ typedef struct qat_stats {
|
||||
QAT_STAT_INCR(stat, 1)
|
||||
|
||||
extern qat_stats_t qat_stats;
|
||||
extern int zfs_qat_disable;
|
||||
|
||||
/* inlined for performance */
|
||||
static inline struct page *
|
||||
@ -158,19 +173,24 @@ extern void qat_fini(void);
|
||||
|
||||
extern boolean_t qat_dc_use_accel(size_t s_len);
|
||||
extern boolean_t qat_crypt_use_accel(size_t s_len);
|
||||
extern boolean_t qat_checksum_use_accel(size_t s_len);
|
||||
extern int qat_compress(qat_compress_dir_t dir, char *src, int src_len,
|
||||
char *dst, int dst_len, size_t *c_len);
|
||||
extern int qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf,
|
||||
uint8_t *aad_buf, uint32_t aad_len, uint8_t *iv_buf, uint8_t *digest_buf,
|
||||
crypto_key_t *key, uint64_t crypt, uint32_t enc_len);
|
||||
extern int qat_checksum(uint64_t cksum, uint8_t *buf, uint64_t size,
|
||||
zio_cksum_t *zcp);
|
||||
#else
|
||||
#define CPA_STATUS_SUCCESS 0
|
||||
#define qat_init()
|
||||
#define qat_fini()
|
||||
#define qat_dc_use_accel(s_len) 0
|
||||
#define qat_crypt_use_accel(s_len) 0
|
||||
#define qat_checksum_use_accel(s_len) 0
|
||||
#define qat_compress(dir, s, sl, d, dl, cl) 0
|
||||
#define qat_crypt(dir, s, d, a, al, i, db, k, c, el) 0
|
||||
#define qat_checksum(c, buf, s, z) 0
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_QAT_H */
|
||||
|
@ -47,11 +47,12 @@ static CpaBufferList **buffer_array[QAT_DC_MAX_INSTANCES];
|
||||
static Cpa16U num_inst = 0;
|
||||
static Cpa32U inst_num = 0;
|
||||
static boolean_t qat_dc_init_done = B_FALSE;
|
||||
int zfs_qat_compress_disable = 0;
|
||||
|
||||
boolean_t
|
||||
qat_dc_use_accel(size_t s_len)
|
||||
{
|
||||
return (!zfs_qat_disable &&
|
||||
return (!zfs_qat_compress_disable &&
|
||||
qat_dc_init_done &&
|
||||
s_len >= QAT_MIN_BUF_SIZE &&
|
||||
s_len <= QAT_MAX_BUF_SIZE);
|
||||
@ -471,4 +472,7 @@ qat_compress(qat_compress_dir_t dir, char *src, int src_len,
|
||||
return (ret);
|
||||
}
|
||||
|
||||
module_param(zfs_qat_compress_disable, int, 0644);
|
||||
MODULE_PARM_DESC(zfs_qat_compress_disable, "Disable QAT compression");
|
||||
|
||||
#endif
|
||||
|
@ -19,6 +19,13 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file represents the QAT implementation of checksums and encryption.
|
||||
* Internally, QAT shares the same cryptographic instances for both of these
|
||||
* operations, so the code has been combined here. QAT data compression uses
|
||||
* compression instances, so that code is separated into qat_compress.c
|
||||
*/
|
||||
|
||||
#if defined(_KERNEL) && defined(HAVE_QAT)
|
||||
#include <linux/slab.h>
|
||||
#include <linux/vmalloc.h>
|
||||
@ -27,6 +34,7 @@
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zio_crypt.h>
|
||||
#include "lac/cpa_cy_im.h"
|
||||
#include "lac/cpa_cy_common.h"
|
||||
#include "qat.h"
|
||||
|
||||
/*
|
||||
@ -39,10 +47,12 @@
|
||||
|
||||
#define MAX_PAGE_NUM 1024
|
||||
|
||||
static boolean_t qat_crypt_init_done = B_FALSE;
|
||||
static Cpa16U inst_num = 0;
|
||||
static Cpa16U num_inst = 0;
|
||||
static CpaInstanceHandle cy_inst_handles[QAT_CRYPT_MAX_INSTANCES];
|
||||
static boolean_t qat_crypt_init_done = B_FALSE;
|
||||
int zfs_qat_encrypt_disable = 0;
|
||||
int zfs_qat_checksum_disable = 0;
|
||||
|
||||
typedef struct cy_callback {
|
||||
CpaBoolean verify_result;
|
||||
@ -65,7 +75,16 @@ symcallback(void *p_callback, CpaStatus status, const CpaCySymOp operation,
|
||||
boolean_t
|
||||
qat_crypt_use_accel(size_t s_len)
|
||||
{
|
||||
return (!zfs_qat_disable &&
|
||||
return (!zfs_qat_encrypt_disable &&
|
||||
qat_crypt_init_done &&
|
||||
s_len >= QAT_MIN_BUF_SIZE &&
|
||||
s_len <= QAT_MAX_BUF_SIZE);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
qat_checksum_use_accel(size_t s_len)
|
||||
{
|
||||
return (!zfs_qat_checksum_disable &&
|
||||
qat_crypt_init_done &&
|
||||
s_len >= QAT_MIN_BUF_SIZE &&
|
||||
s_len <= QAT_MAX_BUF_SIZE);
|
||||
@ -131,7 +150,7 @@ qat_crypt_fini(void)
|
||||
}
|
||||
|
||||
static CpaStatus
|
||||
init_cy_session_ctx(qat_encrypt_dir_t dir, CpaInstanceHandle inst_handle,
|
||||
qat_init_crypt_session_ctx(qat_encrypt_dir_t dir, CpaInstanceHandle inst_handle,
|
||||
CpaCySymSessionCtx **cy_session_ctx, crypto_key_t *key,
|
||||
Cpa64U crypt, Cpa32U aad_len)
|
||||
{
|
||||
@ -192,7 +211,52 @@ init_cy_session_ctx(qat_encrypt_dir_t dir, CpaInstanceHandle inst_handle,
|
||||
}
|
||||
|
||||
static CpaStatus
|
||||
init_cy_buffer_lists(CpaInstanceHandle inst_handle, uint32_t nr_bufs,
|
||||
qat_init_checksum_session_ctx(CpaInstanceHandle inst_handle,
|
||||
CpaCySymSessionCtx **cy_session_ctx, Cpa64U cksum)
|
||||
{
|
||||
CpaStatus status = CPA_STATUS_SUCCESS;
|
||||
Cpa32U ctx_size;
|
||||
Cpa32U hash_algorithm;
|
||||
CpaCySymSessionSetupData sd = { 0 };
|
||||
|
||||
/*
|
||||
* ZFS's SHA512 checksum is actually SHA512/256, which uses
|
||||
* a different IV from standard SHA512. QAT does not support
|
||||
* SHA512/256, so we can only support SHA256.
|
||||
*/
|
||||
if (cksum == ZIO_CHECKSUM_SHA256)
|
||||
hash_algorithm = CPA_CY_SYM_HASH_SHA256;
|
||||
else
|
||||
return (CPA_STATUS_FAIL);
|
||||
|
||||
sd.sessionPriority = CPA_CY_PRIORITY_NORMAL;
|
||||
sd.symOperation = CPA_CY_SYM_OP_HASH;
|
||||
sd.hashSetupData.hashAlgorithm = hash_algorithm;
|
||||
sd.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_PLAIN;
|
||||
sd.hashSetupData.digestResultLenInBytes = sizeof (zio_cksum_t);
|
||||
sd.digestIsAppended = CPA_FALSE;
|
||||
sd.verifyDigest = CPA_FALSE;
|
||||
|
||||
status = cpaCySymSessionCtxGetSize(inst_handle, &sd, &ctx_size);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
return (status);
|
||||
|
||||
status = QAT_PHYS_CONTIG_ALLOC(cy_session_ctx, ctx_size);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
return (status);
|
||||
|
||||
status = cpaCySymInitSession(inst_handle, symcallback, &sd,
|
||||
*cy_session_ctx);
|
||||
if (status != CPA_STATUS_SUCCESS) {
|
||||
QAT_PHYS_CONTIG_FREE(*cy_session_ctx);
|
||||
return (status);
|
||||
}
|
||||
|
||||
return (CPA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
static CpaStatus
|
||||
qat_init_cy_buffer_lists(CpaInstanceHandle inst_handle, uint32_t nr_bufs,
|
||||
CpaBufferList *src, CpaBufferList *dst)
|
||||
{
|
||||
CpaStatus status = CPA_STATUS_SUCCESS;
|
||||
@ -233,7 +297,7 @@ qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf,
|
||||
CpaStatus status = CPA_STATUS_SUCCESS;
|
||||
Cpa16U i;
|
||||
CpaInstanceHandle cy_inst_handle;
|
||||
Cpa16U nr_bufs;
|
||||
Cpa16U nr_bufs = (enc_len + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE;
|
||||
Cpa32U bytes_left = 0;
|
||||
Cpa8S *in = NULL;
|
||||
Cpa8S *out = NULL;
|
||||
@ -249,6 +313,8 @@ qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf,
|
||||
struct page *in_pages[MAX_PAGE_NUM];
|
||||
struct page *out_pages[MAX_PAGE_NUM];
|
||||
Cpa32S page_num = 0;
|
||||
Cpa32U in_page_off = 0;
|
||||
Cpa32U out_page_off = 0;
|
||||
|
||||
if (dir == QAT_ENCRYPT) {
|
||||
QAT_STAT_BUMP(encrypt_requests);
|
||||
@ -261,15 +327,17 @@ qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf,
|
||||
i = atomic_inc_32_nv(&inst_num) % num_inst;
|
||||
cy_inst_handle = cy_inst_handles[i];
|
||||
|
||||
status = init_cy_session_ctx(dir, cy_inst_handle, &cy_session_ctx, key,
|
||||
crypt, aad_len);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
status = qat_init_crypt_session_ctx(dir, cy_inst_handle,
|
||||
&cy_session_ctx, key, crypt, aad_len);
|
||||
if (status != CPA_STATUS_SUCCESS) {
|
||||
/* don't count CCM as a failure since it's not supported */
|
||||
if (zio_crypt_table[crypt].ci_crypt_type == ZC_TYPE_GCM)
|
||||
QAT_STAT_BUMP(crypt_fails);
|
||||
return (status);
|
||||
}
|
||||
|
||||
nr_bufs = enc_len / PAGE_CACHE_SIZE +
|
||||
(enc_len % PAGE_CACHE_SIZE == 0 ? 0 : 1);
|
||||
status = init_cy_buffer_lists(cy_inst_handle, nr_bufs, &src_buffer_list,
|
||||
&dst_buffer_list);
|
||||
status = qat_init_cy_buffer_lists(cy_inst_handle, nr_bufs,
|
||||
&src_buffer_list, &dst_buffer_list);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
@ -288,14 +356,16 @@ qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf,
|
||||
flat_src_buf = flat_src_buf_array;
|
||||
flat_dst_buf = flat_dst_buf_array;
|
||||
while (bytes_left > 0) {
|
||||
in_page_off = ((long)in & ~PAGE_MASK);
|
||||
out_page_off = ((long)out & ~PAGE_MASK);
|
||||
in_pages[page_num] = qat_mem_to_page(in);
|
||||
out_pages[page_num] = qat_mem_to_page(out);
|
||||
flat_src_buf->pData = kmap(in_pages[page_num]);
|
||||
flat_dst_buf->pData = kmap(out_pages[page_num]);
|
||||
flat_src_buf->dataLenInBytes = min((long)PAGE_CACHE_SIZE,
|
||||
(long)bytes_left);
|
||||
flat_dst_buf->dataLenInBytes = min((long)PAGE_CACHE_SIZE,
|
||||
(long)bytes_left);
|
||||
flat_src_buf->pData = kmap(in_pages[page_num]) + in_page_off;
|
||||
flat_dst_buf->pData = kmap(out_pages[page_num]) + out_page_off;
|
||||
flat_src_buf->dataLenInBytes =
|
||||
min((long)PAGE_CACHE_SIZE - in_page_off, (long)bytes_left);
|
||||
flat_dst_buf->dataLenInBytes =
|
||||
min((long)PAGE_CACHE_SIZE - out_page_off, (long)bytes_left);
|
||||
in += flat_src_buf->dataLenInBytes;
|
||||
out += flat_dst_buf->dataLenInBytes;
|
||||
bytes_left -= flat_src_buf->dataLenInBytes;
|
||||
@ -345,12 +415,10 @@ qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf,
|
||||
QAT_STAT_INCR(decrypt_total_out_bytes, enc_len);
|
||||
|
||||
fail:
|
||||
/* don't count CCM as a failure since it's not supported */
|
||||
if (status != CPA_STATUS_SUCCESS &&
|
||||
zio_crypt_table[crypt].ci_crypt_type != ZC_TYPE_CCM)
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
QAT_STAT_BUMP(crypt_fails);
|
||||
|
||||
for (i = 0; i < page_num; i ++) {
|
||||
for (i = 0; i < page_num; i++) {
|
||||
kunmap(in_pages[i]);
|
||||
kunmap(out_pages[i]);
|
||||
}
|
||||
@ -365,7 +433,108 @@ qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf,
|
||||
return (status);
|
||||
}
|
||||
|
||||
module_param(zfs_qat_disable, int, 0644);
|
||||
MODULE_PARM_DESC(zfs_qat_disable, "Disable QAT acceleration");
|
||||
int
|
||||
qat_checksum(uint64_t cksum, uint8_t *buf, uint64_t size, zio_cksum_t *zcp)
|
||||
{
|
||||
CpaStatus status;
|
||||
Cpa16U i;
|
||||
CpaInstanceHandle cy_inst_handle;
|
||||
Cpa16U nr_bufs = (size + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE;
|
||||
Cpa32U bytes_left = 0;
|
||||
Cpa8S *data = NULL;
|
||||
CpaCySymSessionCtx *cy_session_ctx = NULL;
|
||||
cy_callback_t cb;
|
||||
Cpa8U digest_buffer[sizeof (zio_cksum_t)];
|
||||
CpaCySymOpData op_data = { 0 };
|
||||
CpaBufferList src_buffer_list = { 0 };
|
||||
CpaFlatBuffer *flat_src_buf_array = NULL;
|
||||
CpaFlatBuffer *flat_src_buf = NULL;
|
||||
struct page *in_pages[MAX_PAGE_NUM];
|
||||
Cpa32S page_num = 0;
|
||||
Cpa32U page_off = 0;
|
||||
|
||||
QAT_STAT_BUMP(cksum_requests);
|
||||
QAT_STAT_INCR(cksum_total_in_bytes, size);
|
||||
|
||||
i = atomic_inc_32_nv(&inst_num) % num_inst;
|
||||
cy_inst_handle = cy_inst_handles[i];
|
||||
|
||||
status = qat_init_checksum_session_ctx(cy_inst_handle,
|
||||
&cy_session_ctx, cksum);
|
||||
if (status != CPA_STATUS_SUCCESS) {
|
||||
/* don't count unsupported checksums as a failure */
|
||||
if (cksum == ZIO_CHECKSUM_SHA256 ||
|
||||
cksum == ZIO_CHECKSUM_SHA512)
|
||||
QAT_STAT_BUMP(cksum_fails);
|
||||
return (status);
|
||||
}
|
||||
|
||||
status = qat_init_cy_buffer_lists(cy_inst_handle, nr_bufs,
|
||||
&src_buffer_list, &src_buffer_list);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
status = QAT_PHYS_CONTIG_ALLOC(&flat_src_buf_array,
|
||||
nr_bufs * sizeof (CpaFlatBuffer));
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
bytes_left = size;
|
||||
data = buf;
|
||||
flat_src_buf = flat_src_buf_array;
|
||||
while (bytes_left > 0) {
|
||||
page_off = ((long)data & ~PAGE_MASK);
|
||||
in_pages[page_num] = qat_mem_to_page(data);
|
||||
flat_src_buf->pData = kmap(in_pages[page_num]) + page_off;
|
||||
flat_src_buf->dataLenInBytes =
|
||||
min((long)PAGE_CACHE_SIZE - page_off, (long)bytes_left);
|
||||
data += flat_src_buf->dataLenInBytes;
|
||||
bytes_left -= flat_src_buf->dataLenInBytes;
|
||||
flat_src_buf++;
|
||||
page_num++;
|
||||
}
|
||||
src_buffer_list.pBuffers = flat_src_buf_array;
|
||||
|
||||
op_data.sessionCtx = cy_session_ctx;
|
||||
op_data.packetType = CPA_CY_SYM_PACKET_TYPE_FULL;
|
||||
op_data.hashStartSrcOffsetInBytes = 0;
|
||||
op_data.messageLenToHashInBytes = size;
|
||||
op_data.pDigestResult = digest_buffer;
|
||||
|
||||
cb.verify_result = CPA_FALSE;
|
||||
init_completion(&cb.complete);
|
||||
status = cpaCySymPerformOp(cy_inst_handle, &cb, &op_data,
|
||||
&src_buffer_list, &src_buffer_list, NULL);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
if (!wait_for_completion_interruptible_timeout(&cb.complete,
|
||||
QAT_TIMEOUT_MS)) {
|
||||
status = CPA_STATUS_FAIL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
bcopy(digest_buffer, zcp, sizeof (zio_cksum_t));
|
||||
|
||||
fail:
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
QAT_STAT_BUMP(cksum_fails);
|
||||
|
||||
for (i = 0; i < page_num; i++)
|
||||
kunmap(in_pages[i]);
|
||||
|
||||
cpaCySymRemoveSession(cy_inst_handle, cy_session_ctx);
|
||||
QAT_PHYS_CONTIG_FREE(src_buffer_list.pPrivateMetaData);
|
||||
QAT_PHYS_CONTIG_FREE(cy_session_ctx);
|
||||
QAT_PHYS_CONTIG_FREE(flat_src_buf_array);
|
||||
|
||||
return (status);
|
||||
}
|
||||
|
||||
module_param(zfs_qat_encrypt_disable, int, 0644);
|
||||
MODULE_PARM_DESC(zfs_qat_encrypt_disable, "Disable QAT encryption");
|
||||
|
||||
module_param(zfs_qat_checksum_disable, int, 0644);
|
||||
MODULE_PARM_DESC(zfs_qat_checksum_disable, "Disable QAT checksumming");
|
||||
|
||||
#endif
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include <sys/zio.h>
|
||||
#include <sys/sha2.h>
|
||||
#include <sys/abd.h>
|
||||
#include "qat.h"
|
||||
|
||||
static int
|
||||
sha_incremental(void *buf, size_t size, void *arg)
|
||||
@ -44,13 +45,25 @@ void
|
||||
abd_checksum_SHA256(abd_t *abd, uint64_t size,
|
||||
const void *ctx_template, zio_cksum_t *zcp)
|
||||
{
|
||||
int ret;
|
||||
SHA2_CTX ctx;
|
||||
zio_cksum_t tmp;
|
||||
|
||||
if (qat_checksum_use_accel(size)) {
|
||||
uint8_t *buf = abd_borrow_buf_copy(abd, size);
|
||||
ret = qat_checksum(ZIO_CHECKSUM_SHA256, buf, size, &tmp);
|
||||
abd_return_buf(abd, buf, size);
|
||||
if (ret == CPA_STATUS_SUCCESS)
|
||||
goto bswap;
|
||||
|
||||
/* If the hardware implementation fails fall back to software */
|
||||
}
|
||||
|
||||
SHA2Init(SHA256, &ctx);
|
||||
(void) abd_iterate_func(abd, 0, size, sha_incremental, &ctx);
|
||||
SHA2Final(&tmp, &ctx);
|
||||
|
||||
bswap:
|
||||
/*
|
||||
* A prior implementation of this function had a
|
||||
* private SHA256 implementation always wrote things out in
|
||||
|
Loading…
Reference in New Issue
Block a user