f5c5ebb133
The upstream repository is on github BLAKE2/libb2. Files landed in sys/contrib/libb2 are the unmodified upstream files, except for one difference: secure_zero_memory's contents have been replaced with explicit_bzero() only because the previous implementation broke powerpc link. Preferential use of explicit_bzero() is in progress upstream, so it is anticipated we will be able to drop this diff in the future. sys/crypto/blake2 contains the source files needed to port libb2 to our build system, a wrapped (limited) variant of the algorithm to match the API of our auth_transform softcrypto abstraction, incorporation into the Open Crypto Framework (OCF) cryptosoft(4) driver, as well as an x86 SSE/AVX accelerated OCF driver, blake2(4). Optimized variants of blake2 are compiled for a number of x86 machines (anything from SSE2 to AVX + XOP). On those machines, FPU context will need to be explicitly saved before using blake2(4)-provided algorithms directly. Use via cryptodev / OCF saves FPU state automatically, and use via the auth_transform softcrypto abstraction does not use FPU. The intent of the OCF driver is mostly to enable testing in userspace via /dev/crypto. ATF tests are added with published KAT test vectors to validate correctness. Reviewed by: jhb, markj Obtained from: github BLAKE2/libb2 Differential Revision: https://reviews.freebsd.org/D14662
275 lines
7.3 KiB
C
275 lines
7.3 KiB
C
/*
|
|
BLAKE2 reference source code package - optimized C implementations
|
|
|
|
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
|
|
|
|
To the extent possible under law, the author(s) have dedicated all copyright
|
|
and related and neighboring rights to this software to the public domain
|
|
worldwide. This software is distributed without any warranty.
|
|
|
|
You should have received a copy of the CC0 Public Domain Dedication along with
|
|
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
|
|
#if defined(_OPENMP)
|
|
#include <omp.h>
|
|
#endif
|
|
|
|
#include "blake2.h"
|
|
#include "blake2-impl.h"
|
|
|
|
#define PARALLELISM_DEGREE 8
|
|
|
|
static int blake2sp_init_leaf( blake2s_state *S, uint8_t outlen, uint8_t keylen, uint64_t offset )
|
|
{
|
|
blake2s_param P[1];
|
|
P->digest_length = outlen;
|
|
P->key_length = keylen;
|
|
P->fanout = PARALLELISM_DEGREE;
|
|
P->depth = 2;
|
|
P->leaf_length = 0;
|
|
store48( P->node_offset, offset );
|
|
P->node_depth = 0;
|
|
P->inner_length = BLAKE2S_OUTBYTES;
|
|
memset( P->salt, 0, sizeof( P->salt ) );
|
|
memset( P->personal, 0, sizeof( P->personal ) );
|
|
blake2s_init_param( S, P );
|
|
S->outlen = P->inner_length;
|
|
return 0;
|
|
}
|
|
|
|
static int blake2sp_init_root( blake2s_state *S, uint8_t outlen, uint8_t keylen )
|
|
{
|
|
blake2s_param P[1];
|
|
P->digest_length = outlen;
|
|
P->key_length = keylen;
|
|
P->fanout = PARALLELISM_DEGREE;
|
|
P->depth = 2;
|
|
P->leaf_length = 0;
|
|
store48( P->node_offset, 0ULL );
|
|
P->node_depth = 1;
|
|
P->inner_length = BLAKE2S_OUTBYTES;
|
|
memset( P->salt, 0, sizeof( P->salt ) );
|
|
memset( P->personal, 0, sizeof( P->personal ) );
|
|
blake2s_init_param( S, P );
|
|
S->outlen = P->digest_length;
|
|
return 0;
|
|
}
|
|
|
|
|
|
int blake2sp_init( blake2sp_state *S, size_t outlen )
|
|
{
|
|
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
|
|
|
|
memset( S->buf, 0, sizeof( S->buf ) );
|
|
S->buflen = 0;
|
|
|
|
if( blake2sp_init_root( S->R, ( uint8_t ) outlen, 0 ) < 0 )
|
|
return -1;
|
|
|
|
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
|
|
if( blake2sp_init_leaf( S->S[i], ( uint8_t ) outlen, 0, i ) < 0 ) return -1;
|
|
|
|
S->R->last_node = 1;
|
|
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
|
|
S->outlen = ( uint8_t ) outlen;
|
|
return 0;
|
|
}
|
|
|
|
int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen )
|
|
{
|
|
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
|
|
|
|
if( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1;
|
|
|
|
memset( S->buf, 0, sizeof( S->buf ) );
|
|
S->buflen = 0;
|
|
|
|
if( blake2sp_init_root( S->R, ( uint8_t ) outlen, ( uint8_t ) keylen ) < 0 )
|
|
return -1;
|
|
|
|
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
|
|
if( blake2sp_init_leaf( S->S[i], ( uint8_t ) outlen, ( uint8_t ) keylen, i ) < 0 )
|
|
return -1;
|
|
|
|
S->R->last_node = 1;
|
|
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
|
|
S->outlen = ( uint8_t ) outlen;
|
|
{
|
|
uint8_t block[BLAKE2S_BLOCKBYTES];
|
|
memset( block, 0, BLAKE2S_BLOCKBYTES );
|
|
memcpy( block, key, keylen );
|
|
|
|
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
|
|
blake2s_update( S->S[i], block, BLAKE2S_BLOCKBYTES );
|
|
|
|
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
int blake2sp_update( blake2sp_state *S, const uint8_t *in, size_t inlen )
|
|
{
|
|
size_t left = S->buflen;
|
|
size_t fill = sizeof( S->buf ) - left;
|
|
|
|
if( left && inlen >= fill )
|
|
{
|
|
memcpy( S->buf + left, in, fill );
|
|
|
|
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
|
|
blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES );
|
|
|
|
in += fill;
|
|
inlen -= fill;
|
|
left = 0;
|
|
}
|
|
|
|
#if defined(_OPENMP)
|
|
omp_set_num_threads(PARALLELISM_DEGREE);
|
|
#pragma omp parallel shared(S)
|
|
#else
|
|
for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ )
|
|
#endif
|
|
{
|
|
#if defined(_OPENMP)
|
|
size_t id__ = ( size_t ) omp_get_thread_num();
|
|
#endif
|
|
size_t inlen__ = inlen;
|
|
const uint8_t *in__ = ( const uint8_t * )in;
|
|
in__ += id__ * BLAKE2S_BLOCKBYTES;
|
|
|
|
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES )
|
|
{
|
|
blake2s_update( S->S[id__], in__, BLAKE2S_BLOCKBYTES );
|
|
in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
|
|
inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
|
|
}
|
|
}
|
|
|
|
in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES );
|
|
inlen %= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
|
|
|
|
if( inlen > 0 )
|
|
memcpy( S->buf + left, in, inlen );
|
|
|
|
S->buflen = ( uint32_t ) left + ( uint32_t ) inlen;
|
|
return 0;
|
|
}
|
|
|
|
|
|
int blake2sp_final( blake2sp_state *S, uint8_t *out, size_t outlen )
|
|
{
|
|
uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES];
|
|
|
|
if(S->outlen != outlen) return -1;
|
|
|
|
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
|
|
{
|
|
if( S->buflen > i * BLAKE2S_BLOCKBYTES )
|
|
{
|
|
size_t left = S->buflen - i * BLAKE2S_BLOCKBYTES;
|
|
|
|
if( left > BLAKE2S_BLOCKBYTES ) left = BLAKE2S_BLOCKBYTES;
|
|
|
|
blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, left );
|
|
}
|
|
|
|
blake2s_final( S->S[i], hash[i], BLAKE2S_OUTBYTES );
|
|
}
|
|
|
|
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
|
|
blake2s_update( S->R, hash[i], BLAKE2S_OUTBYTES );
|
|
|
|
blake2s_final( S->R, out, outlen );
|
|
return 0;
|
|
}
|
|
|
|
|
|
int blake2sp( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen )
|
|
{
|
|
uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES];
|
|
blake2s_state S[PARALLELISM_DEGREE][1];
|
|
blake2s_state FS[1];
|
|
|
|
/* Verify parameters */
|
|
if ( NULL == in && inlen > 0 ) return -1;
|
|
|
|
if ( NULL == out ) return -1;
|
|
|
|
if ( NULL == key && keylen > 0 ) return -1;
|
|
|
|
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
|
|
|
|
if( keylen > BLAKE2S_KEYBYTES ) return -1;
|
|
|
|
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
|
|
if( blake2sp_init_leaf( S[i], ( uint8_t ) outlen, ( uint8_t ) keylen, i ) < 0 )
|
|
return -1;
|
|
|
|
S[PARALLELISM_DEGREE - 1]->last_node = 1; // mark last node
|
|
|
|
if( keylen > 0 )
|
|
{
|
|
uint8_t block[BLAKE2S_BLOCKBYTES];
|
|
memset( block, 0, BLAKE2S_BLOCKBYTES );
|
|
memcpy( block, key, keylen );
|
|
|
|
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
|
|
blake2s_update( S[i], block, BLAKE2S_BLOCKBYTES );
|
|
|
|
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
|
|
}
|
|
|
|
#if defined(_OPENMP)
|
|
omp_set_num_threads(PARALLELISM_DEGREE);
|
|
#pragma omp parallel shared(S,hash)
|
|
#else
|
|
|
|
for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ )
|
|
#endif
|
|
{
|
|
#if defined(_OPENMP)
|
|
size_t id__ = ( size_t ) omp_get_thread_num();
|
|
#endif
|
|
size_t inlen__ = inlen;
|
|
const uint8_t *in__ = ( const uint8_t * )in;
|
|
in__ += id__ * BLAKE2S_BLOCKBYTES;
|
|
|
|
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES )
|
|
{
|
|
blake2s_update( S[id__], in__, BLAKE2S_BLOCKBYTES );
|
|
in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
|
|
inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
|
|
}
|
|
|
|
if( inlen__ > id__ * BLAKE2S_BLOCKBYTES )
|
|
{
|
|
const size_t left = inlen__ - id__ * BLAKE2S_BLOCKBYTES;
|
|
const size_t len = left <= BLAKE2S_BLOCKBYTES ? left : BLAKE2S_BLOCKBYTES;
|
|
blake2s_update( S[id__], in__, len );
|
|
}
|
|
|
|
blake2s_final( S[id__], hash[id__], BLAKE2S_OUTBYTES );
|
|
}
|
|
|
|
if( blake2sp_init_root( FS, ( uint8_t ) outlen, ( uint8_t ) keylen ) < 0 )
|
|
return -1;
|
|
|
|
FS->last_node = 1;
|
|
|
|
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
|
|
blake2s_update( FS, hash[i], BLAKE2S_OUTBYTES );
|
|
|
|
return blake2s_final( FS, out, outlen );
|
|
}
|
|
|
|
|
|
|
|
|