Import Blake2 algorithms (blake2b, blake2s) from libb2

The upstream repository is on github BLAKE2/libb2.  Files landed in
sys/contrib/libb2 are the unmodified upstream files, except for one
difference:  secure_zero_memory's contents have been replaced with
explicit_bzero() only because the previous implementation broke powerpc
link.  Preferential use of explicit_bzero() is in progress upstream, so
it is anticipated we will be able to drop this diff in the future.

sys/crypto/blake2 contains the source files needed to port libb2 to our
build system, a wrapped (limited) variant of the algorithm to match the API
of our auth_transform softcrypto abstraction, incorporation into the Open
Crypto Framework (OCF) cryptosoft(4) driver, as well as an x86 SSE/AVX
accelerated OCF driver, blake2(4).

Optimized variants of blake2 are compiled for a number of x86 machines
(anything from SSE2 to AVX + XOP).  On those machines, FPU context will need
to be explicitly saved before using blake2(4)-provided algorithms directly.
Use via cryptodev / OCF saves FPU state automatically, and use via the
auth_transform softcrypto abstraction does not use FPU.

The intent of the OCF driver is mostly to enable testing in userspace via
/dev/crypto.  ATF tests are added with published KAT test vectors to
validate correctness.

Reviewed by:	jhb, markj
Obtained from:	github BLAKE2/libb2
Differential Revision:	https://reviews.freebsd.org/D14662
This commit is contained in:
cem 2018-03-21 16:18:14 +00:00
parent 6564a13bd2
commit f5c5ebb133
47 changed files with 21959 additions and 1 deletions

View File

@ -648,6 +648,13 @@ contrib/zstd/lib/compress/zstd_ldm.c standard compile-with ${ZSTD_C}
contrib/zstd/lib/compress/zstd_opt.c standard compile-with ${ZSTD_C}
contrib/zstd/lib/decompress/zstd_decompress.c standard compile-with ${ZSTD_C}
contrib/zstd/lib/decompress/huf_decompress.c standard compile-with ${ZSTD_C}
# Blake 2
contrib/libb2/blake2b-ref.c optional crypto | ipsec | ipsec_support \
compile-with "${NORMAL_C} -I$S/crypto/blake2 ${NO_WCAST_QUAL} -DSUFFIX=_ref"
contrib/libb2/blake2s-ref.c optional crypto | ipsec | ipsec_support \
compile-with "${NORMAL_C} -I$S/crypto/blake2 ${NO_WCAST_QUAL} -DSUFFIX=_ref"
crypto/blake2/blake2-sw.c optional crypto | ipsec | ipsec_support \
compile-with "${NORMAL_C} -I$S/crypto/blake2 ${NO_WCAST_QUAL}"
crypto/blowfish/bf_ecb.c optional ipsec | ipsec_support
crypto/blowfish/bf_skey.c optional crypto | ipsec | ipsec_support
crypto/camellia/camellia.c optional crypto | ipsec | ipsec_support

View File

@ -0,0 +1,71 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#pragma once
#ifndef __BLAKE2_CONFIG_H__
#define __BLAKE2_CONFIG_H__
#if defined(__SSE2__)
#define HAVE_SSE2
#endif
#if defined(__SSSE3__)
#define HAVE_SSSE3
#endif
#if defined(__SSE4_1__)
#define HAVE_SSE4_1
#endif
#if defined(__AVX__)
#define HAVE_AVX
#endif
#if defined(__XOP__)
#define HAVE_XOP
#endif
#ifdef HAVE_AVX2
#ifndef HAVE_AVX
#define HAVE_AVX
#endif
#endif
#ifdef HAVE_XOP
#ifndef HAVE_AVX
#define HAVE_AVX
#endif
#endif
#ifdef HAVE_AVX
#ifndef HAVE_SSE4_1
#define HAVE_SSE4_1
#endif
#endif
#ifdef HAVE_SSE41
#ifndef HAVE_SSSE3
#define HAVE_SSSE3
#endif
#endif
#ifdef HAVE_SSSE3
#define HAVE_SSE2
#endif
#if !defined(HAVE_SSE2)
#error "This code requires at least SSE2."
#endif
#endif

View File

@ -0,0 +1,577 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#include <stdio.h>
#if defined(WIN32)
#include <windows.h>
#endif
#include "blake2.h"
#if defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
#define HAVE_X86
#endif
typedef enum
{
NONE = 0,
#if defined(HAVE_X86)
SSE2 = 1,
SSSE3 = 2,
SSE41 = 3,
AVX = 4,
XOP = 5,
/* AVX2 = 6, */
#endif
} cpu_feature_t;
static const char feature_names[][8] =
{
"none",
#if defined(HAVE_X86)
"sse2",
"ssse3",
"sse41",
"avx",
"xop",
/* "avx2" */
#endif
};
#if defined(HAVE_X86)
#if defined(__GNUC__)
static inline void cpuid( uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx )
{
__asm__ __volatile__(
#if defined(__i386__) /* This is needed for -fPIC to work on i386 */
"movl %%ebx, %%esi\n\t"
#endif
"cpuid\n\t"
#if defined(__i386__)
"xchgl %%ebx, %%esi\n\t"
: "=a"( *eax ), "=S"( *ebx ), "=c"( *ecx ), "=d"( *edx ) : "a"( *eax ) );
#else
: "=a"( *eax ), "=b"( *ebx ), "=c"( *ecx ), "=d"( *edx ) : "a"( *eax ) );
#endif
}
static inline uint64_t xgetbv(uint32_t xcr)
{
uint32_t a, d;
__asm__ __volatile__(
"xgetbv"
: "=a"(a),"=d"(d)
: "c"(xcr)
);
return ((uint64_t)d << 32) | a;
}
#elif defined(_MSC_VER)
#include <intrin.h>
static inline void cpuid( uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx )
{
int regs[4];
__cpuid( regs, *eax );
*eax = regs[0];
*ebx = regs[1];
*ecx = regs[2];
*edx = regs[3];
}
#else
#error "Don't know how to call cpuid on this compiler!"
#endif
#endif /* HAVE_X86 */
static inline cpu_feature_t get_cpu_features( void )
{
#if defined(HAVE_X86)
static volatile int initialized = 0;
static cpu_feature_t feature = NONE; // Safe default
uint32_t eax, ecx, edx, ebx;
if( initialized )
return feature;
eax = 1;
cpuid( &eax, &ebx, &ecx, &edx );
if( 1 & ( edx >> 26 ) )
feature = SSE2;
if( 1 & ( ecx >> 9 ) )
feature = SSSE3;
if( 1 & ( ecx >> 19 ) )
feature = SSE41;
#if defined(WIN32) /* Work around the fact that Windows <7 does NOT support AVX... */
if( IsProcessorFeaturePresent(17) ) /* Some environments don't know about PF_XSAVE_ENABLED */
#endif
{
/* check for AVX and OSXSAVE bits */
if( 1 & ( ecx >> 28 ) & (ecx >> 27) ) {
#if !defined(WIN32) /* Already checked for this in WIN32 */
if( (xgetbv(0) & 6) == 6 ) /* XCR0 */
#endif
feature = AVX;
}
eax = 0x80000001;
cpuid( &eax, &ebx, &ecx, &edx );
if( 1 & ( ecx >> 11 ) )
feature = XOP;
}
/* For future architectures */
/*
eax = 7; ecx = 0;
cpuid(&eax, &ebx, &ecx, &edx);
if(1&(ebx >> 5))
feature = AVX2;
*/
/* fprintf( stderr, "Using %s engine\n", feature_names[feature] ); */
initialized = 1;
return feature;
#else
return NONE;
#endif
}
#if defined(__cplusplus)
extern "C" {
#endif
int blake2b_init_ref( blake2b_state *S, size_t outlen );
int blake2b_init_key_ref( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
int blake2b_init_param_ref( blake2b_state *S, const blake2b_param *P );
int blake2b_update_ref( blake2b_state *S, const uint8_t *in, size_t inlen );
int blake2b_final_ref( blake2b_state *S, uint8_t *out, size_t outlen );
int blake2b_ref( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
#if defined(HAVE_X86)
int blake2b_init_sse2( blake2b_state *S, size_t outlen );
int blake2b_init_key_sse2( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
int blake2b_init_param_sse2( blake2b_state *S, const blake2b_param *P );
int blake2b_update_sse2( blake2b_state *S, const uint8_t *in, size_t inlen );
int blake2b_final_sse2( blake2b_state *S, uint8_t *out, size_t outlen );
int blake2b_sse2( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
int blake2b_init_ssse3( blake2b_state *S, size_t outlen );
int blake2b_init_key_ssse3( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
int blake2b_init_param_ssse3( blake2b_state *S, const blake2b_param *P );
int blake2b_update_ssse3( blake2b_state *S, const uint8_t *in, size_t inlen );
int blake2b_final_ssse3( blake2b_state *S, uint8_t *out, size_t outlen );
int blake2b_ssse3( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
int blake2b_init_sse41( blake2b_state *S, size_t outlen );
int blake2b_init_key_sse41( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
int blake2b_init_param_sse41( blake2b_state *S, const blake2b_param *P );
int blake2b_update_sse41( blake2b_state *S, const uint8_t *in, size_t inlen );
int blake2b_final_sse41( blake2b_state *S, uint8_t *out, size_t outlen );
int blake2b_sse41( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
int blake2b_init_avx( blake2b_state *S, size_t outlen );
int blake2b_init_key_avx( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
int blake2b_init_param_avx( blake2b_state *S, const blake2b_param *P );
int blake2b_update_avx( blake2b_state *S, const uint8_t *in, size_t inlen );
int blake2b_final_avx( blake2b_state *S, uint8_t *out, size_t outlen );
int blake2b_avx( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
int blake2b_init_xop( blake2b_state *S, size_t outlen );
int blake2b_init_key_xop( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
int blake2b_init_param_xop( blake2b_state *S, const blake2b_param *P );
int blake2b_update_xop( blake2b_state *S, const uint8_t *in, size_t inlen );
int blake2b_final_xop( blake2b_state *S, uint8_t *out, size_t outlen );
int blake2b_xop( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
#endif /* HAVE_X86 */
int blake2s_init_ref( blake2s_state *S, size_t outlen );
int blake2s_init_key_ref( blake2s_state *S, size_t outlen, const void *key, size_t keylen );
int blake2s_init_param_ref( blake2s_state *S, const blake2s_param *P );
int blake2s_update_ref( blake2s_state *S, const uint8_t *in, size_t inlen );
int blake2s_final_ref( blake2s_state *S, uint8_t *out, size_t outlen );
int blake2s_ref( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
#if defined(HAVE_X86)
int blake2s_init_sse2( blake2s_state *S, size_t outlen );
int blake2s_init_key_sse2( blake2s_state *S, size_t outlen, const void *key, size_t keylen );
int blake2s_init_param_sse2( blake2s_state *S, const blake2s_param *P );
int blake2s_update_sse2( blake2s_state *S, const uint8_t *in, size_t inlen );
int blake2s_final_sse2( blake2s_state *S, uint8_t *out, size_t outlen );
int blake2s_sse2( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
int blake2s_init_ssse3( blake2s_state *S, size_t outlen );
int blake2s_init_key_ssse3( blake2s_state *S, size_t outlen, const void *key, size_t keylen );
int blake2s_init_param_ssse3( blake2s_state *S, const blake2s_param *P );
int blake2s_update_ssse3( blake2s_state *S, const uint8_t *in, size_t inlen );
int blake2s_final_ssse3( blake2s_state *S, uint8_t *out, size_t outlen );
int blake2s_ssse3( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
int blake2s_init_sse41( blake2s_state *S, size_t outlen );
int blake2s_init_key_sse41( blake2s_state *S, size_t outlen, const void *key, size_t keylen );
int blake2s_init_param_sse41( blake2s_state *S, const blake2s_param *P );
int blake2s_update_sse41( blake2s_state *S, const uint8_t *in, size_t inlen );
int blake2s_final_sse41( blake2s_state *S, uint8_t *out, size_t outlen );
int blake2s_sse41( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
int blake2s_init_avx( blake2s_state *S, size_t outlen );
int blake2s_init_key_avx( blake2s_state *S, size_t outlen, const void *key, size_t keylen );
int blake2s_init_param_avx( blake2s_state *S, const blake2s_param *P );
int blake2s_update_avx( blake2s_state *S, const uint8_t *in, size_t inlen );
int blake2s_final_avx( blake2s_state *S, uint8_t *out, size_t outlen );
int blake2s_avx( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
int blake2s_init_xop( blake2s_state *S, size_t outlen );
int blake2s_init_key_xop( blake2s_state *S, size_t outlen, const void *key, size_t keylen );
int blake2s_init_param_xop( blake2s_state *S, const blake2s_param *P );
int blake2s_update_xop( blake2s_state *S, const uint8_t *in, size_t inlen );
int blake2s_final_xop( blake2s_state *S, uint8_t *out, size_t outlen );
int blake2s_xop( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
#endif /* HAVE_X86 */
#if defined(__cplusplus)
}
#endif
typedef int ( *blake2b_init_fn )( blake2b_state *, size_t );
typedef int ( *blake2b_init_key_fn )( blake2b_state *, size_t, const void *, size_t );
typedef int ( *blake2b_init_param_fn )( blake2b_state *, const blake2b_param * );
typedef int ( *blake2b_update_fn )( blake2b_state *, const uint8_t *, size_t );
typedef int ( *blake2b_final_fn )( blake2b_state *, uint8_t *, size_t );
typedef int ( *blake2b_fn )( uint8_t *, const void *, const void *, size_t, size_t, size_t );
typedef int ( *blake2s_init_fn )( blake2s_state *, size_t );
typedef int ( *blake2s_init_key_fn )( blake2s_state *, size_t, const void *, size_t );
typedef int ( *blake2s_init_param_fn )( blake2s_state *, const blake2s_param * );
typedef int ( *blake2s_update_fn )( blake2s_state *, const uint8_t *, size_t );
typedef int ( *blake2s_final_fn )( blake2s_state *, uint8_t *, size_t );
typedef int ( *blake2s_fn )( uint8_t *, const void *, const void *, size_t, size_t, size_t );
static const blake2b_init_fn blake2b_init_table[] =
{
blake2b_init_ref,
#if defined(HAVE_X86)
blake2b_init_sse2,
blake2b_init_ssse3,
blake2b_init_sse41,
blake2b_init_avx,
blake2b_init_xop
#endif
};
static const blake2b_init_key_fn blake2b_init_key_table[] =
{
blake2b_init_key_ref,
#if defined(HAVE_X86)
blake2b_init_key_sse2,
blake2b_init_key_ssse3,
blake2b_init_key_sse41,
blake2b_init_key_avx,
blake2b_init_key_xop
#endif
};
static const blake2b_init_param_fn blake2b_init_param_table[] =
{
blake2b_init_param_ref,
#if defined(HAVE_X86)
blake2b_init_param_sse2,
blake2b_init_param_ssse3,
blake2b_init_param_sse41,
blake2b_init_param_avx,
blake2b_init_param_xop
#endif
};
static const blake2b_update_fn blake2b_update_table[] =
{
blake2b_update_ref,
#if defined(HAVE_X86)
blake2b_update_sse2,
blake2b_update_ssse3,
blake2b_update_sse41,
blake2b_update_avx,
blake2b_update_xop
#endif
};
static const blake2b_final_fn blake2b_final_table[] =
{
blake2b_final_ref,
#if defined(HAVE_X86)
blake2b_final_sse2,
blake2b_final_ssse3,
blake2b_final_sse41,
blake2b_final_avx,
blake2b_final_xop
#endif
};
static const blake2b_fn blake2b_table[] =
{
blake2b_ref,
#if defined(HAVE_X86)
blake2b_sse2,
blake2b_ssse3,
blake2b_sse41,
blake2b_avx,
blake2b_xop
#endif
};
static const blake2s_init_fn blake2s_init_table[] =
{
blake2s_init_ref,
#if defined(HAVE_X86)
blake2s_init_sse2,
blake2s_init_ssse3,
blake2s_init_sse41,
blake2s_init_avx,
blake2s_init_xop
#endif
};
static const blake2s_init_key_fn blake2s_init_key_table[] =
{
blake2s_init_key_ref,
#if defined(HAVE_X86)
blake2s_init_key_sse2,
blake2s_init_key_ssse3,
blake2s_init_key_sse41,
blake2s_init_key_avx,
blake2s_init_key_xop
#endif
};
static const blake2s_init_param_fn blake2s_init_param_table[] =
{
blake2s_init_param_ref,
#if defined(HAVE_X86)
blake2s_init_param_sse2,
blake2s_init_param_ssse3,
blake2s_init_param_sse41,
blake2s_init_param_avx,
blake2s_init_param_xop
#endif
};
static const blake2s_update_fn blake2s_update_table[] =
{
blake2s_update_ref,
#if defined(HAVE_X86)
blake2s_update_sse2,
blake2s_update_ssse3,
blake2s_update_sse41,
blake2s_update_avx,
blake2s_update_xop
#endif
};
static const blake2s_final_fn blake2s_final_table[] =
{
blake2s_final_ref,
#if defined(HAVE_X86)
blake2s_final_sse2,
blake2s_final_ssse3,
blake2s_final_sse41,
blake2s_final_avx,
blake2s_final_xop
#endif
};
static const blake2s_fn blake2s_table[] =
{
blake2s_ref,
#if defined(HAVE_X86)
blake2s_sse2,
blake2s_ssse3,
blake2s_sse41,
blake2s_avx,
blake2s_xop
#endif
};
#if defined(__cplusplus)
extern "C" {
#endif
int blake2b_init_dispatch( blake2b_state *S, size_t outlen );
int blake2b_init_key_dispatch( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
int blake2b_init_param_dispatch( blake2b_state *S, const blake2b_param *P );
int blake2b_update_dispatch( blake2b_state *S, const uint8_t *in, size_t inlen );
int blake2b_final_dispatch( blake2b_state *S, uint8_t *out, size_t outlen );
int blake2b_dispatch( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
int blake2s_init_dispatch( blake2s_state *S, size_t outlen );
int blake2s_init_key_dispatch( blake2s_state *S, size_t outlen, const void *key, size_t keylen );
int blake2s_init_param_dispatch( blake2s_state *S, const blake2s_param *P );
int blake2s_update_dispatch( blake2s_state *S, const uint8_t *in, size_t inlen );
int blake2s_final_dispatch( blake2s_state *S, uint8_t *out, size_t outlen );
int blake2s_dispatch( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
#if defined(__cplusplus)
}
#endif
static blake2b_init_fn blake2b_init_ptr = blake2b_init_dispatch;
static blake2b_init_key_fn blake2b_init_key_ptr = blake2b_init_key_dispatch;
static blake2b_init_param_fn blake2b_init_param_ptr = blake2b_init_param_dispatch;
static blake2b_update_fn blake2b_update_ptr = blake2b_update_dispatch;
static blake2b_final_fn blake2b_final_ptr = blake2b_final_dispatch;
static blake2b_fn blake2b_ptr = blake2b_dispatch;
static blake2s_init_fn blake2s_init_ptr = blake2s_init_dispatch;
static blake2s_init_key_fn blake2s_init_key_ptr = blake2s_init_key_dispatch;
static blake2s_init_param_fn blake2s_init_param_ptr = blake2s_init_param_dispatch;
static blake2s_update_fn blake2s_update_ptr = blake2s_update_dispatch;
static blake2s_final_fn blake2s_final_ptr = blake2s_final_dispatch;
static blake2s_fn blake2s_ptr = blake2s_dispatch;
int blake2b_init_dispatch( blake2b_state *S, size_t outlen )
{
blake2b_init_ptr = blake2b_init_table[get_cpu_features()];
return blake2b_init_ptr( S, outlen );
}
int blake2b_init_key_dispatch( blake2b_state *S, size_t outlen, const void *key, size_t keylen )
{
blake2b_init_key_ptr = blake2b_init_key_table[get_cpu_features()];
return blake2b_init_key_ptr( S, outlen, key, keylen );
}
int blake2b_init_param_dispatch( blake2b_state *S, const blake2b_param *P )
{
blake2b_init_param_ptr = blake2b_init_param_table[get_cpu_features()];
return blake2b_init_param_ptr( S, P );
}
int blake2b_update_dispatch( blake2b_state *S, const uint8_t *in, size_t inlen )
{
blake2b_update_ptr = blake2b_update_table[get_cpu_features()];
return blake2b_update_ptr( S, in, inlen );
}
int blake2b_final_dispatch( blake2b_state *S, uint8_t *out, size_t outlen )
{
blake2b_final_ptr = blake2b_final_table[get_cpu_features()];
return blake2b_final_ptr( S, out, outlen );
}
int blake2b_dispatch( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen )
{
blake2b_ptr = blake2b_table[get_cpu_features()];
return blake2b_ptr( out, in, key, outlen, inlen, keylen );
}
BLAKE2_API int blake2b_init( blake2b_state *S, size_t outlen )
{
return blake2b_init_ptr( S, outlen );
}
BLAKE2_API int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen )
{
return blake2b_init_key_ptr( S, outlen, key, keylen );
}
BLAKE2_API int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
{
return blake2b_init_param_ptr( S, P );
}
BLAKE2_API int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen )
{
return blake2b_update_ptr( S, in, inlen );
}
BLAKE2_API int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen )
{
return blake2b_final_ptr( S, out, outlen );
}
BLAKE2_API int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen )
{
return blake2b_ptr( out, in, key, outlen, inlen, keylen );
}
int blake2s_init_dispatch( blake2s_state *S, size_t outlen )
{
blake2s_init_ptr = blake2s_init_table[get_cpu_features()];
return blake2s_init_ptr( S, outlen );
}
int blake2s_init_key_dispatch( blake2s_state *S, size_t outlen, const void *key, size_t keylen )
{
blake2s_init_key_ptr = blake2s_init_key_table[get_cpu_features()];
return blake2s_init_key_ptr( S, outlen, key, keylen );
}
int blake2s_init_param_dispatch( blake2s_state *S, const blake2s_param *P )
{
blake2s_init_param_ptr = blake2s_init_param_table[get_cpu_features()];
return blake2s_init_param_ptr( S, P );
}
int blake2s_update_dispatch( blake2s_state *S, const uint8_t *in, size_t inlen )
{
blake2s_update_ptr = blake2s_update_table[get_cpu_features()];
return blake2s_update_ptr( S, in, inlen );
}
int blake2s_final_dispatch( blake2s_state *S, uint8_t *out, size_t outlen )
{
blake2s_final_ptr = blake2s_final_table[get_cpu_features()];
return blake2s_final_ptr( S, out, outlen );
}
int blake2s_dispatch( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen )
{
blake2s_ptr = blake2s_table[get_cpu_features()];
return blake2s_ptr( out, in, key, outlen, inlen, keylen );
}
BLAKE2_API int blake2s_init( blake2s_state *S, size_t outlen )
{
return blake2s_init_ptr( S, outlen );
}
BLAKE2_API int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen )
{
return blake2s_init_key_ptr( S, outlen, key, keylen );
}
BLAKE2_API int blake2s_init_param( blake2s_state *S, const blake2s_param *P )
{
return blake2s_init_param_ptr( S, P );
}
BLAKE2_API int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen )
{
return blake2s_update_ptr( S, in, inlen );
}
BLAKE2_API int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen )
{
return blake2s_final_ptr( S, out, outlen );
}
BLAKE2_API int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen )
{
return blake2s_ptr( out, in, key, outlen, inlen, keylen );
}

View File

@ -0,0 +1,143 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#pragma once
#ifndef __BLAKE2_IMPL_H__
#define __BLAKE2_IMPL_H__
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include "config.h"
#define BLAKE2_IMPL_CAT(x,y) x ## y
#define BLAKE2_IMPL_EVAL(x,y) BLAKE2_IMPL_CAT(x,y)
#define BLAKE2_IMPL_NAME(fun) BLAKE2_IMPL_EVAL(fun, SUFFIX)
static inline uint32_t load32( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN) && !defined(HAVE_ALIGNED_ACCESS_REQUIRED)
return *( uint32_t * )( src );
#else
const uint8_t *p = ( uint8_t * )src;
uint32_t w = *p++;
w |= ( uint32_t )( *p++ ) << 8;
w |= ( uint32_t )( *p++ ) << 16;
w |= ( uint32_t )( *p++ ) << 24;
return w;
#endif
}
static inline uint64_t load64( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN) && !defined(HAVE_ALIGNED_ACCESS_REQUIRED)
return *( uint64_t * )( src );
#else
const uint8_t *p = ( uint8_t * )src;
uint64_t w = *p++;
w |= ( uint64_t )( *p++ ) << 8;
w |= ( uint64_t )( *p++ ) << 16;
w |= ( uint64_t )( *p++ ) << 24;
w |= ( uint64_t )( *p++ ) << 32;
w |= ( uint64_t )( *p++ ) << 40;
w |= ( uint64_t )( *p++ ) << 48;
w |= ( uint64_t )( *p++ ) << 56;
return w;
#endif
}
static inline void store32( void *dst, uint32_t w )
{
#if defined(NATIVE_LITTLE_ENDIAN) && !defined(HAVE_ALIGNED_ACCESS_REQUIRED)
*( uint32_t * )( dst ) = w;
#else
uint8_t *p = ( uint8_t * )dst;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w;
#endif
}
static inline void store64( void *dst, uint64_t w )
{
#if defined(NATIVE_LITTLE_ENDIAN) && !defined(HAVE_ALIGNED_ACCESS_REQUIRED)
*( uint64_t * )( dst ) = w;
#else
uint8_t *p = ( uint8_t * )dst;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w;
#endif
}
static inline uint64_t load48( const void *src )
{
const uint8_t *p = ( const uint8_t * )src;
uint64_t w = *p++;
w |= ( uint64_t )( *p++ ) << 8;
w |= ( uint64_t )( *p++ ) << 16;
w |= ( uint64_t )( *p++ ) << 24;
w |= ( uint64_t )( *p++ ) << 32;
w |= ( uint64_t )( *p++ ) << 40;
return w;
}
static inline void store48( void *dst, uint64_t w )
{
uint8_t *p = ( uint8_t * )dst;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w;
}
static inline uint32_t rotl32( const uint32_t w, const unsigned c )
{
return ( w << c ) | ( w >> ( 32 - c ) );
}
static inline uint64_t rotl64( const uint64_t w, const unsigned c )
{
return ( w << c ) | ( w >> ( 64 - c ) );
}
static inline uint32_t rotr32( const uint32_t w, const unsigned c )
{
return ( w >> c ) | ( w << ( 32 - c ) );
}
static inline uint64_t rotr64( const uint64_t w, const unsigned c )
{
return ( w >> c ) | ( w << ( 64 - c ) );
}
/* prevents compiler optimizing out memset() */
static inline void secure_zero_memory(void *v, size_t n)
{
#ifdef __FreeBSD__
explicit_bzero(v, n);
#else
static void *(*const volatile memset_v)(void *, int, size_t) = &memset;
memset_v(v, 0, n);
#endif
}
#endif

182
sys/contrib/libb2/blake2.h Normal file
View File

@ -0,0 +1,182 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#pragma once
#ifndef __BLAKE2_H__
#define __BLAKE2_H__
#include <stddef.h>
#include <stdint.h>
#if defined(_WIN32) || defined(__CYGWIN__)
#define BLAKE2_DLL_IMPORT __declspec(dllimport)
#define BLAKE2_DLL_EXPORT __declspec(dllexport)
#define BLAKE2_DLL_PRIVATE
#elif __GNUC__ >= 4
#define BLAKE2_DLL_IMPORT __attribute__ ((visibility ("default")))
#define BLAKE2_DLL_EXPORT __attribute__ ((visibility ("default")))
#define BLAKE2_DLL_PRIVATE __attribute__ ((visibility ("hidden")))
#else
#define BLAKE2_DLL_IMPORT
#define BLAKE2_DLL_EXPORT
#define BLAKE2_DLL_PRIVATE
#endif
#if defined(BLAKE2_DLL)
#if defined(BLAKE2_DLL_EXPORTS) // defined if we are building the DLL
#define BLAKE2_API BLAKE2_DLL_EXPORT
#else
#define BLAKE2_API BLAKE2_DLL_IMPORT
#endif
#define BLAKE2_PRIVATE BLAKE2_DLL_PRIVATE // must only be used by hidden logic
#else
#define BLAKE2_API
#define BLAKE2_PRIVATE
#endif
#if defined(__cplusplus)
extern "C" {
#elif defined(_MSC_VER) && !defined(inline)
#define inline __inline
#endif
enum blake2s_constant
{
BLAKE2S_BLOCKBYTES = 64,
BLAKE2S_OUTBYTES = 32,
BLAKE2S_KEYBYTES = 32,
BLAKE2S_SALTBYTES = 8,
BLAKE2S_PERSONALBYTES = 8
};
enum blake2b_constant
{
BLAKE2B_BLOCKBYTES = 128,
BLAKE2B_OUTBYTES = 64,
BLAKE2B_KEYBYTES = 64,
BLAKE2B_SALTBYTES = 16,
BLAKE2B_PERSONALBYTES = 16
};
#pragma pack(push, 1)
typedef struct __blake2s_param
{
uint8_t digest_length; // 1
uint8_t key_length; // 2
uint8_t fanout; // 3
uint8_t depth; // 4
uint32_t leaf_length; // 8
uint8_t node_offset[6];// 14
uint8_t node_depth; // 15
uint8_t inner_length; // 16
// uint8_t reserved[0];
uint8_t salt[BLAKE2S_SALTBYTES]; // 24
uint8_t personal[BLAKE2S_PERSONALBYTES]; // 32
} blake2s_param;
typedef struct __blake2s_state
{
uint32_t h[8];
uint32_t t[2];
uint32_t f[2];
uint8_t buf[2 * BLAKE2S_BLOCKBYTES];
uint32_t buflen;
uint8_t outlen;
uint8_t last_node;
} blake2s_state;
typedef struct __blake2b_param
{
uint8_t digest_length; // 1
uint8_t key_length; // 2
uint8_t fanout; // 3
uint8_t depth; // 4
uint32_t leaf_length; // 8
uint64_t node_offset; // 16
uint8_t node_depth; // 17
uint8_t inner_length; // 18
uint8_t reserved[14]; // 32
uint8_t salt[BLAKE2B_SALTBYTES]; // 48
uint8_t personal[BLAKE2B_PERSONALBYTES]; // 64
} blake2b_param;
typedef struct __blake2b_state
{
uint64_t h[8];
uint64_t t[2];
uint64_t f[2];
uint8_t buf[2 * BLAKE2B_BLOCKBYTES];
uint32_t buflen;
uint8_t outlen;
uint8_t last_node;
} blake2b_state;
typedef struct __blake2sp_state
{
blake2s_state S[8][1];
blake2s_state R[1];
uint8_t buf[8 * BLAKE2S_BLOCKBYTES];
uint32_t buflen;
uint8_t outlen;
} blake2sp_state;
typedef struct __blake2bp_state
{
blake2b_state S[4][1];
blake2b_state R[1];
uint8_t buf[4 * BLAKE2B_BLOCKBYTES];
uint32_t buflen;
uint8_t outlen;
} blake2bp_state;
#pragma pack(pop)
// Streaming API
BLAKE2_API int blake2s_init( blake2s_state *S, size_t outlen );
BLAKE2_API int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen );
BLAKE2_API int blake2s_init_param( blake2s_state *S, const blake2s_param *P );
BLAKE2_API int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen );
BLAKE2_API int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen );
BLAKE2_API int blake2b_init( blake2b_state *S, size_t outlen );
BLAKE2_API int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
BLAKE2_API int blake2b_init_param( blake2b_state *S, const blake2b_param *P );
BLAKE2_API int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen );
BLAKE2_API int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen );
BLAKE2_API int blake2sp_init( blake2sp_state *S, size_t outlen );
BLAKE2_API int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen );
BLAKE2_API int blake2sp_update( blake2sp_state *S, const uint8_t *in, size_t inlen );
BLAKE2_API int blake2sp_final( blake2sp_state *S, uint8_t *out, size_t outlen );
BLAKE2_API int blake2bp_init( blake2bp_state *S, size_t outlen );
BLAKE2_API int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen );
BLAKE2_API int blake2bp_update( blake2bp_state *S, const uint8_t *in, size_t inlen );
BLAKE2_API int blake2bp_final( blake2bp_state *S, uint8_t *out, size_t outlen );
// Simple API
BLAKE2_API int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
BLAKE2_API int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
BLAKE2_API int blake2sp( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
BLAKE2_API int blake2bp( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
static inline int blake2( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen )
{
return blake2b( out, in, key, outlen, inlen, keylen );
}
#if defined(__cplusplus)
}
#endif
#endif

View File

@ -0,0 +1,68 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#pragma once
#ifndef __BLAKE2B_LOAD_SSE2_H__
#define __BLAKE2B_LOAD_SSE2_H__
#define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
#define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
#define LOAD_MSG_0_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
#define LOAD_MSG_0_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
#define LOAD_MSG_1_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
#define LOAD_MSG_1_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
#define LOAD_MSG_1_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
#define LOAD_MSG_1_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
#define LOAD_MSG_2_1(b0, b1) b0 = _mm_set_epi64x(m12, m11); b1 = _mm_set_epi64x(m15, m5)
#define LOAD_MSG_2_2(b0, b1) b0 = _mm_set_epi64x(m0, m8); b1 = _mm_set_epi64x(m13, m2)
#define LOAD_MSG_2_3(b0, b1) b0 = _mm_set_epi64x(m3, m10); b1 = _mm_set_epi64x(m9, m7)
#define LOAD_MSG_2_4(b0, b1) b0 = _mm_set_epi64x(m6, m14); b1 = _mm_set_epi64x(m4, m1)
#define LOAD_MSG_3_1(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m13)
#define LOAD_MSG_3_2(b0, b1) b0 = _mm_set_epi64x(m1, m9); b1 = _mm_set_epi64x(m14, m12)
#define LOAD_MSG_3_3(b0, b1) b0 = _mm_set_epi64x(m5, m2); b1 = _mm_set_epi64x(m15, m4)
#define LOAD_MSG_3_4(b0, b1) b0 = _mm_set_epi64x(m10, m6); b1 = _mm_set_epi64x(m8, m0)
#define LOAD_MSG_4_1(b0, b1) b0 = _mm_set_epi64x(m5, m9); b1 = _mm_set_epi64x(m10, m2)
#define LOAD_MSG_4_2(b0, b1) b0 = _mm_set_epi64x(m7, m0); b1 = _mm_set_epi64x(m15, m4)
#define LOAD_MSG_4_3(b0, b1) b0 = _mm_set_epi64x(m11, m14); b1 = _mm_set_epi64x(m3, m6)
#define LOAD_MSG_4_4(b0, b1) b0 = _mm_set_epi64x(m12, m1); b1 = _mm_set_epi64x(m13, m8)
#define LOAD_MSG_5_1(b0, b1) b0 = _mm_set_epi64x(m6, m2); b1 = _mm_set_epi64x(m8, m0)
#define LOAD_MSG_5_2(b0, b1) b0 = _mm_set_epi64x(m10, m12); b1 = _mm_set_epi64x(m3, m11)
#define LOAD_MSG_5_3(b0, b1) b0 = _mm_set_epi64x(m7, m4); b1 = _mm_set_epi64x(m1, m15)
#define LOAD_MSG_5_4(b0, b1) b0 = _mm_set_epi64x(m5, m13); b1 = _mm_set_epi64x(m9, m14)
#define LOAD_MSG_6_1(b0, b1) b0 = _mm_set_epi64x(m1, m12); b1 = _mm_set_epi64x(m4, m14)
#define LOAD_MSG_6_2(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m10, m13)
#define LOAD_MSG_6_3(b0, b1) b0 = _mm_set_epi64x(m6, m0); b1 = _mm_set_epi64x(m8, m9)
#define LOAD_MSG_6_4(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m2)
#define LOAD_MSG_7_1(b0, b1) b0 = _mm_set_epi64x(m7, m13); b1 = _mm_set_epi64x(m3, m12)
#define LOAD_MSG_7_2(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m9, m1)
#define LOAD_MSG_7_3(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m2, m8)
#define LOAD_MSG_7_4(b0, b1) b0 = _mm_set_epi64x(m4, m0); b1 = _mm_set_epi64x(m10, m6)
#define LOAD_MSG_8_1(b0, b1) b0 = _mm_set_epi64x(m14, m6); b1 = _mm_set_epi64x(m0, m11)
#define LOAD_MSG_8_2(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m8, m3)
#define LOAD_MSG_8_3(b0, b1) b0 = _mm_set_epi64x(m13, m12); b1 = _mm_set_epi64x(m10, m1)
#define LOAD_MSG_8_4(b0, b1) b0 = _mm_set_epi64x(m7, m2); b1 = _mm_set_epi64x(m5, m4)
#define LOAD_MSG_9_1(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m1, m7)
#define LOAD_MSG_9_2(b0, b1) b0 = _mm_set_epi64x(m4, m2); b1 = _mm_set_epi64x(m5, m6)
#define LOAD_MSG_9_3(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m13, m3)
#define LOAD_MSG_9_4(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m0, m12)
#define LOAD_MSG_10_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
#define LOAD_MSG_10_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
#define LOAD_MSG_10_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
#define LOAD_MSG_10_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
#define LOAD_MSG_11_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
#define LOAD_MSG_11_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
#define LOAD_MSG_11_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
#define LOAD_MSG_11_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
#endif

View File

@ -0,0 +1,402 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#pragma once
#ifndef __BLAKE2B_LOAD_SSE41_H__
#define __BLAKE2B_LOAD_SSE41_H__
#define LOAD_MSG_0_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m1); \
b1 = _mm_unpacklo_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_0_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m0, m1); \
b1 = _mm_unpackhi_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_0_3(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m4, m5); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_0_4(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m5); \
b1 = _mm_unpackhi_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_1_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m7, m2); \
b1 = _mm_unpackhi_epi64(m4, m6); \
} while(0)
#define LOAD_MSG_1_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_alignr_epi8(m3, m7, 8); \
} while(0)
#define LOAD_MSG_1_3(b0, b1) \
do \
{ \
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
b1 = _mm_unpackhi_epi64(m5, m2); \
} while(0)
#define LOAD_MSG_1_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m6, m1); \
b1 = _mm_unpackhi_epi64(m3, m1); \
} while(0)
#define LOAD_MSG_2_1(b0, b1) \
do \
{ \
b0 = _mm_alignr_epi8(m6, m5, 8); \
b1 = _mm_unpackhi_epi64(m2, m7); \
} while(0)
#define LOAD_MSG_2_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m4, m0); \
b1 = _mm_blend_epi16(m1, m6, 0xF0); \
} while(0)
#define LOAD_MSG_2_3(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m5, m1, 0xF0); \
b1 = _mm_unpackhi_epi64(m3, m4); \
} while(0)
#define LOAD_MSG_2_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m7, m3); \
b1 = _mm_alignr_epi8(m2, m0, 8); \
} while(0)
#define LOAD_MSG_3_1(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m3, m1); \
b1 = _mm_unpackhi_epi64(m6, m5); \
} while(0)
#define LOAD_MSG_3_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m0); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_3_3(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m1, m2, 0xF0); \
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
} while(0)
#define LOAD_MSG_3_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m3, m5); \
b1 = _mm_unpacklo_epi64(m0, m4); \
} while(0)
#define LOAD_MSG_4_1(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m2); \
b1 = _mm_unpacklo_epi64(m1, m5); \
} while(0)
#define LOAD_MSG_4_2(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m0, m3, 0xF0); \
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
} while(0)
#define LOAD_MSG_4_3(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m7, m5, 0xF0); \
b1 = _mm_blend_epi16(m3, m1, 0xF0); \
} while(0)
#define LOAD_MSG_4_4(b0, b1) \
do \
{ \
b0 = _mm_alignr_epi8(m6, m0, 8); \
b1 = _mm_blend_epi16(m4, m6, 0xF0); \
} while(0)
#define LOAD_MSG_5_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m1, m3); \
b1 = _mm_unpacklo_epi64(m0, m4); \
} while(0)
#define LOAD_MSG_5_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m6, m5); \
b1 = _mm_unpackhi_epi64(m5, m1); \
} while(0)
#define LOAD_MSG_5_3(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m2, m3, 0xF0); \
b1 = _mm_unpackhi_epi64(m7, m0); \
} while(0)
#define LOAD_MSG_5_4(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m6, m2); \
b1 = _mm_blend_epi16(m7, m4, 0xF0); \
} while(0)
#define LOAD_MSG_6_1(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m6, m0, 0xF0); \
b1 = _mm_unpacklo_epi64(m7, m2); \
} while(0)
#define LOAD_MSG_6_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m2, m7); \
b1 = _mm_alignr_epi8(m5, m6, 8); \
} while(0)
#define LOAD_MSG_6_3(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m3); \
b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \
} while(0)
#define LOAD_MSG_6_4(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m3, m1); \
b1 = _mm_blend_epi16(m1, m5, 0xF0); \
} while(0)
#define LOAD_MSG_7_1(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m6, m3); \
b1 = _mm_blend_epi16(m6, m1, 0xF0); \
} while(0)
#define LOAD_MSG_7_2(b0, b1) \
do \
{ \
b0 = _mm_alignr_epi8(m7, m5, 8); \
b1 = _mm_unpackhi_epi64(m0, m4); \
} while(0)
#define LOAD_MSG_7_3(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m2, m7); \
b1 = _mm_unpacklo_epi64(m4, m1); \
} while(0)
#define LOAD_MSG_7_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m2); \
b1 = _mm_unpacklo_epi64(m3, m5); \
} while(0)
#define LOAD_MSG_8_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m3, m7); \
b1 = _mm_alignr_epi8(m0, m5, 8); \
} while(0)
#define LOAD_MSG_8_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m7, m4); \
b1 = _mm_alignr_epi8(m4, m1, 8); \
} while(0)
#define LOAD_MSG_8_3(b0, b1) \
do \
{ \
b0 = m6; \
b1 = _mm_alignr_epi8(m5, m0, 8); \
} while(0)
#define LOAD_MSG_8_4(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m1, m3, 0xF0); \
b1 = m2; \
} while(0)
#define LOAD_MSG_9_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_unpackhi_epi64(m3, m0); \
} while(0)
#define LOAD_MSG_9_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m1, m2); \
b1 = _mm_blend_epi16(m3, m2, 0xF0); \
} while(0)
#define LOAD_MSG_9_3(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m7, m4); \
b1 = _mm_unpackhi_epi64(m1, m6); \
} while(0)
#define LOAD_MSG_9_4(b0, b1) \
do \
{ \
b0 = _mm_alignr_epi8(m7, m5, 8); \
b1 = _mm_unpacklo_epi64(m6, m0); \
} while(0)
#define LOAD_MSG_10_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m1); \
b1 = _mm_unpacklo_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_10_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m0, m1); \
b1 = _mm_unpackhi_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_10_3(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m4, m5); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_10_4(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m5); \
b1 = _mm_unpackhi_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_11_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m7, m2); \
b1 = _mm_unpackhi_epi64(m4, m6); \
} while(0)
#define LOAD_MSG_11_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_alignr_epi8(m3, m7, 8); \
} while(0)
#define LOAD_MSG_11_3(b0, b1) \
do \
{ \
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
b1 = _mm_unpackhi_epi64(m5, m2); \
} while(0)
#define LOAD_MSG_11_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m6, m1); \
b1 = _mm_unpackhi_epi64(m3, m1); \
} while(0)
#endif

View File

@ -0,0 +1,386 @@
/*
BLAKE2 reference source code package - reference C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "blake2.h"
#include "blake2-impl.h"
static const uint64_t blake2b_IV[8] =
{
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
};
static const uint8_t blake2b_sigma[12][16] =
{
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
};
static inline int blake2b_set_lastnode( blake2b_state *S )
{
S->f[1] = ~0ULL;
return 0;
}
static inline int blake2b_clear_lastnode( blake2b_state *S )
{
S->f[1] = 0ULL;
return 0;
}
/* Some helper functions, not necessarily useful */
static inline int blake2b_set_lastblock( blake2b_state *S )
{
if( S->last_node ) blake2b_set_lastnode( S );
S->f[0] = ~0ULL;
return 0;
}
static inline int blake2b_clear_lastblock( blake2b_state *S )
{
if( S->last_node ) blake2b_clear_lastnode( S );
S->f[0] = 0ULL;
return 0;
}
static inline int blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
{
S->t[0] += inc;
S->t[1] += ( S->t[0] < inc );
return 0;
}
// Parameter-related functions
static inline int blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length )
{
P->digest_length = digest_length;
return 0;
}
static inline int blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout )
{
P->fanout = fanout;
return 0;
}
static inline int blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth )
{
P->depth = depth;
return 0;
}
static inline int blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length )
{
store32( &P->leaf_length, leaf_length );
return 0;
}
static inline int blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset )
{
store64( &P->node_offset, node_offset );
return 0;
}
static inline int blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth )
{
P->node_depth = node_depth;
return 0;
}
static inline int blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length )
{
P->inner_length = inner_length;
return 0;
}
static inline int blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] )
{
memcpy( P->salt, salt, BLAKE2B_SALTBYTES );
return 0;
}
static inline int blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] )
{
memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES );
return 0;
}
static inline int blake2b_init0( blake2b_state *S )
{
memset( S, 0, sizeof( blake2b_state ) );
for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i];
return 0;
}
#define blake2b_init BLAKE2_IMPL_NAME(blake2b_init)
#define blake2b_init_param BLAKE2_IMPL_NAME(blake2b_init_param)
#define blake2b_init_key BLAKE2_IMPL_NAME(blake2b_init_key)
#define blake2b_update BLAKE2_IMPL_NAME(blake2b_update)
#define blake2b_final BLAKE2_IMPL_NAME(blake2b_final)
#define blake2b BLAKE2_IMPL_NAME(blake2b)
#if defined(__cplusplus)
extern "C" {
#endif
int blake2b_init( blake2b_state *S, size_t outlen );
int blake2b_init_param( blake2b_state *S, const blake2b_param *P );
int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen );
int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen );
int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
#if defined(__cplusplus)
}
#endif
/* init xors IV with input parameter block */
int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
{
blake2b_init0( S );
uint8_t *p = ( uint8_t * )( P );
/* IV XOR ParamBlock */
for( size_t i = 0; i < 8; ++i )
S->h[i] ^= load64( p + sizeof( S->h[i] ) * i );
S->outlen = P->digest_length;
return 0;
}
int blake2b_init( blake2b_state *S, size_t outlen )
{
blake2b_param P[1];
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
P->digest_length = ( uint8_t ) outlen;
P->key_length = 0;
P->fanout = 1;
P->depth = 1;
store32( &P->leaf_length, 0 );
store64( &P->node_offset, 0 );
P->node_depth = 0;
P->inner_length = 0;
memset( P->reserved, 0, sizeof( P->reserved ) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2b_init_param( S, P );
}
int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen )
{
blake2b_param P[1];
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
if ( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1;
P->digest_length = ( uint8_t ) outlen;
P->key_length = ( uint8_t ) keylen;
P->fanout = 1;
P->depth = 1;
store32( &P->leaf_length, 0 );
store64( &P->node_offset, 0 );
P->node_depth = 0;
P->inner_length = 0;
memset( P->reserved, 0, sizeof( P->reserved ) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
if( blake2b_init_param( S, P ) < 0 ) return -1;
{
uint8_t block[BLAKE2B_BLOCKBYTES];
memset( block, 0, BLAKE2B_BLOCKBYTES );
memcpy( block, key, keylen );
blake2b_update( S, block, BLAKE2B_BLOCKBYTES );
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
}
return 0;
}
static int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
{
uint64_t m[16];
uint64_t v[16];
size_t i;
for( i = 0; i < 16; ++i )
m[i] = load64( block + i * sizeof( m[i] ) );
for( i = 0; i < 8; ++i )
v[i] = S->h[i];
v[ 8] = blake2b_IV[0];
v[ 9] = blake2b_IV[1];
v[10] = blake2b_IV[2];
v[11] = blake2b_IV[3];
v[12] = S->t[0] ^ blake2b_IV[4];
v[13] = S->t[1] ^ blake2b_IV[5];
v[14] = S->f[0] ^ blake2b_IV[6];
v[15] = S->f[1] ^ blake2b_IV[7];
#define G(r,i,a,b,c,d) \
do { \
a = a + b + m[blake2b_sigma[r][2*i+0]]; \
d = rotr64(d ^ a, 32); \
c = c + d; \
b = rotr64(b ^ c, 24); \
a = a + b + m[blake2b_sigma[r][2*i+1]]; \
d = rotr64(d ^ a, 16); \
c = c + d; \
b = rotr64(b ^ c, 63); \
} while(0)
#define ROUND(r) \
do { \
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
} while(0)
ROUND( 0 );
ROUND( 1 );
ROUND( 2 );
ROUND( 3 );
ROUND( 4 );
ROUND( 5 );
ROUND( 6 );
ROUND( 7 );
ROUND( 8 );
ROUND( 9 );
ROUND( 10 );
ROUND( 11 );
for( i = 0; i < 8; ++i )
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
#undef G
#undef ROUND
return 0;
}
int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen )
{
while( inlen > 0 )
{
uint32_t left = S->buflen;
uint32_t fill = 2 * BLAKE2B_BLOCKBYTES - left;
if( inlen > fill )
{
memcpy( S->buf + left, in, fill ); // Fill buffer
S->buflen += fill;
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
blake2b_compress( S, S->buf ); // Compress
memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left
S->buflen -= BLAKE2B_BLOCKBYTES;
in += fill;
inlen -= fill;
}
else // inlen <= fill
{
memcpy( S->buf + left, in, inlen );
S->buflen += ( uint32_t ) inlen; // Be lazy, do not compress
in += inlen;
inlen -= inlen;
}
}
return 0;
}
int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen )
{
uint8_t buffer[BLAKE2B_OUTBYTES];
size_t i;
if(S->outlen != outlen) return -1;
if( S->buflen > BLAKE2B_BLOCKBYTES )
{
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
blake2b_compress( S, S->buf );
S->buflen -= BLAKE2B_BLOCKBYTES;
memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen );
}
blake2b_increment_counter( S, S->buflen );
blake2b_set_lastblock( S );
memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
blake2b_compress( S, S->buf );
for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
store64( buffer + sizeof( S->h[i] ) * i, S->h[i] );
memcpy( out, buffer, outlen );
return 0;
}
int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen )
{
blake2b_state S[1];
/* Verify parameters */
if ( NULL == in && inlen > 0 ) return -1;
if ( NULL == out ) return -1;
if( NULL == key && keylen > 0 ) return -1;
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
if( keylen > BLAKE2B_KEYBYTES ) return -1;
if( keylen > 0 )
{
if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1;
}
else
{
if( blake2b_init( S, outlen ) < 0 ) return -1;
}
if( blake2b_update( S, ( uint8_t * )in, inlen ) < 0 ) return -1;
return blake2b_final( S, out, outlen );
}

View File

@ -0,0 +1,160 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#pragma once
#ifndef __BLAKE2B_ROUND_H__
#define __BLAKE2B_ROUND_H__
#define LOAD(p) _mm_load_si128( (__m128i *)(p) )
#define STORE(p,r) _mm_store_si128((__m128i *)(p), r)
#define LOADU(p) _mm_loadu_si128( (__m128i *)(p) )
#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r)
#define TOF(reg) _mm_castsi128_ps((reg))
#define TOI(reg) _mm_castps_si128((reg))
#define LIKELY(x) __builtin_expect((x),1)
/* Microarchitecture-specific macros */
#ifndef HAVE_XOP
#ifdef HAVE_SSSE3
#define _mm_roti_epi64(x, c) \
(-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \
: (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \
: (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \
: (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c))))
#else
#define _mm_roti_epi64(r, c) _mm_xor_si128(_mm_srli_epi64( (r), -(c) ),_mm_slli_epi64( (r), 64-(-(c)) ))
#endif
#else
/* ... */
#endif
#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
\
row4l = _mm_xor_si128(row4l, row1l); \
row4h = _mm_xor_si128(row4h, row1h); \
\
row4l = _mm_roti_epi64(row4l, -32); \
row4h = _mm_roti_epi64(row4h, -32); \
\
row3l = _mm_add_epi64(row3l, row4l); \
row3h = _mm_add_epi64(row3h, row4h); \
\
row2l = _mm_xor_si128(row2l, row3l); \
row2h = _mm_xor_si128(row2h, row3h); \
\
row2l = _mm_roti_epi64(row2l, -24); \
row2h = _mm_roti_epi64(row2h, -24); \
#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
\
row4l = _mm_xor_si128(row4l, row1l); \
row4h = _mm_xor_si128(row4h, row1h); \
\
row4l = _mm_roti_epi64(row4l, -16); \
row4h = _mm_roti_epi64(row4h, -16); \
\
row3l = _mm_add_epi64(row3l, row4l); \
row3h = _mm_add_epi64(row3h, row4h); \
\
row2l = _mm_xor_si128(row2l, row3l); \
row2h = _mm_xor_si128(row2h, row3h); \
\
row2l = _mm_roti_epi64(row2l, -63); \
row2h = _mm_roti_epi64(row2h, -63); \
#if defined(HAVE_SSSE3)
#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
t0 = _mm_alignr_epi8(row2h, row2l, 8); \
t1 = _mm_alignr_epi8(row2l, row2h, 8); \
row2l = t0; \
row2h = t1; \
\
t0 = row3l; \
row3l = row3h; \
row3h = t0; \
\
t0 = _mm_alignr_epi8(row4h, row4l, 8); \
t1 = _mm_alignr_epi8(row4l, row4h, 8); \
row4l = t1; \
row4h = t0;
#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
t0 = _mm_alignr_epi8(row2l, row2h, 8); \
t1 = _mm_alignr_epi8(row2h, row2l, 8); \
row2l = t0; \
row2h = t1; \
\
t0 = row3l; \
row3l = row3h; \
row3h = t0; \
\
t0 = _mm_alignr_epi8(row4l, row4h, 8); \
t1 = _mm_alignr_epi8(row4h, row4l, 8); \
row4l = t1; \
row4h = t0;
#else
#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
t0 = row4l;\
t1 = row2l;\
row4l = row3l;\
row3l = row3h;\
row3h = row4l;\
row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0)); \
row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h)); \
row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h)); \
row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1))
#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
t0 = row3l;\
row3l = row3h;\
row3h = t0;\
t0 = row2l;\
t1 = row4l;\
row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l)); \
row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h)); \
row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h)); \
row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1))
#endif
#if defined(HAVE_SSE4_1)
#include "blake2b-load-sse41.h"
#else
#include "blake2b-load-sse2.h"
#endif
#define ROUND(r) \
LOAD_MSG_ ##r ##_1(b0, b1); \
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
LOAD_MSG_ ##r ##_2(b0, b1); \
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
LOAD_MSG_ ##r ##_3(b0, b1); \
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
LOAD_MSG_ ##r ##_4(b0, b1); \
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
#endif

443
sys/contrib/libb2/blake2b.c Normal file
View File

@ -0,0 +1,443 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "blake2.h"
#include "blake2-impl.h"
#include "blake2-config.h"
#if defined(_MSC_VER)
#include <intrin.h>
#endif
#if defined(HAVE_SSE2)
#include <emmintrin.h>
// MSVC only defines _mm_set_epi64x for x86_64...
#if defined(_MSC_VER) && !defined(_M_X64)
static inline __m128i _mm_set_epi64x( const uint64_t u1, const uint64_t u0 )
{
return _mm_set_epi32( u1 >> 32, u1, u0 >> 32, u0 );
}
#endif
#endif
#if defined(HAVE_SSSE3)
#include <tmmintrin.h>
#endif
#if defined(HAVE_SSE4_1)
#include <smmintrin.h>
#endif
#if defined(HAVE_AVX)
#include <immintrin.h>
#endif
#if defined(HAVE_XOP) && !defined(_MSC_VER)
#include <x86intrin.h>
#endif
#include "blake2b-round.h"
static const uint64_t blake2b_IV[8] =
{
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
};
static const uint8_t blake2b_sigma[12][16] =
{
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
};
/* Some helper functions, not necessarily useful */
static inline int blake2b_set_lastnode( blake2b_state *S )
{
S->f[1] = ~0ULL;
return 0;
}
static inline int blake2b_clear_lastnode( blake2b_state *S )
{
S->f[1] = 0ULL;
return 0;
}
static inline int blake2b_set_lastblock( blake2b_state *S )
{
if( S->last_node ) blake2b_set_lastnode( S );
S->f[0] = ~0ULL;
return 0;
}
static inline int blake2b_clear_lastblock( blake2b_state *S )
{
if( S->last_node ) blake2b_clear_lastnode( S );
S->f[0] = 0ULL;
return 0;
}
static inline int blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
{
#if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__))
// ADD/ADC chain
__uint128_t t = ( ( __uint128_t )S->t[1] << 64 ) | S->t[0];
t += inc;
S->t[0] = ( uint64_t )( t >> 0 );
S->t[1] = ( uint64_t )( t >> 64 );
#else
S->t[0] += inc;
S->t[1] += ( S->t[0] < inc );
#endif
return 0;
}
// Parameter-related functions
static inline int blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length )
{
P->digest_length = digest_length;
return 0;
}
static inline int blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout )
{
P->fanout = fanout;
return 0;
}
static inline int blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth )
{
P->depth = depth;
return 0;
}
static inline int blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length )
{
P->leaf_length = leaf_length;
return 0;
}
static inline int blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset )
{
P->node_offset = node_offset;
return 0;
}
static inline int blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth )
{
P->node_depth = node_depth;
return 0;
}
static inline int blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length )
{
P->inner_length = inner_length;
return 0;
}
static inline int blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] )
{
memcpy( P->salt, salt, BLAKE2B_SALTBYTES );
return 0;
}
static inline int blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] )
{
memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES );
return 0;
}
static inline int blake2b_init0( blake2b_state *S )
{
memset( S, 0, sizeof( blake2b_state ) );
for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i];
return 0;
}
#define blake2b_init BLAKE2_IMPL_NAME(blake2b_init)
#define blake2b_init_param BLAKE2_IMPL_NAME(blake2b_init_param)
#define blake2b_init_key BLAKE2_IMPL_NAME(blake2b_init_key)
#define blake2b_update BLAKE2_IMPL_NAME(blake2b_update)
#define blake2b_final BLAKE2_IMPL_NAME(blake2b_final)
#define blake2b BLAKE2_IMPL_NAME(blake2b)
#if defined(__cplusplus)
extern "C" {
#endif
int blake2b_init( blake2b_state *S, size_t outlen );
int blake2b_init_param( blake2b_state *S, const blake2b_param *P );
int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen );
int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen );
int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
#if defined(__cplusplus)
}
#endif
/* init xors IV with input parameter block */
int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
{
uint8_t *p, *h, *v;
//blake2b_init0( S );
v = ( uint8_t * )( blake2b_IV );
h = ( uint8_t * )( S->h );
p = ( uint8_t * )( P );
/* IV XOR ParamBlock */
memset( S, 0, sizeof( blake2b_state ) );
for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
S->outlen = P->digest_length;
return 0;
}
/* Some sort of default parameter block initialization, for sequential blake2b */
int blake2b_init( blake2b_state *S, size_t outlen )
{
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
const blake2b_param P =
{
( uint8_t ) outlen,
0,
1,
1,
0,
0,
0,
0,
{0},
{0},
{0}
};
return blake2b_init_param( S, &P );
}
int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen )
{
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1;
const blake2b_param P =
{
( uint8_t ) outlen,
( uint8_t ) keylen,
1,
1,
0,
0,
0,
0,
{0},
{0},
{0}
};
if( blake2b_init_param( S, &P ) < 0 )
return 0;
{
uint8_t block[BLAKE2B_BLOCKBYTES];
memset( block, 0, BLAKE2B_BLOCKBYTES );
memcpy( block, key, keylen );
blake2b_update( S, block, BLAKE2B_BLOCKBYTES );
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
}
return 0;
}
static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
{
__m128i row1l, row1h;
__m128i row2l, row2h;
__m128i row3l, row3h;
__m128i row4l, row4h;
__m128i b0, b1;
__m128i t0, t1;
#if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 );
const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 );
#endif
#if defined(HAVE_SSE4_1)
const __m128i m0 = LOADU( block + 00 );
const __m128i m1 = LOADU( block + 16 );
const __m128i m2 = LOADU( block + 32 );
const __m128i m3 = LOADU( block + 48 );
const __m128i m4 = LOADU( block + 64 );
const __m128i m5 = LOADU( block + 80 );
const __m128i m6 = LOADU( block + 96 );
const __m128i m7 = LOADU( block + 112 );
#else
const uint64_t m0 = ( ( uint64_t * )block )[ 0];
const uint64_t m1 = ( ( uint64_t * )block )[ 1];
const uint64_t m2 = ( ( uint64_t * )block )[ 2];
const uint64_t m3 = ( ( uint64_t * )block )[ 3];
const uint64_t m4 = ( ( uint64_t * )block )[ 4];
const uint64_t m5 = ( ( uint64_t * )block )[ 5];
const uint64_t m6 = ( ( uint64_t * )block )[ 6];
const uint64_t m7 = ( ( uint64_t * )block )[ 7];
const uint64_t m8 = ( ( uint64_t * )block )[ 8];
const uint64_t m9 = ( ( uint64_t * )block )[ 9];
const uint64_t m10 = ( ( uint64_t * )block )[10];
const uint64_t m11 = ( ( uint64_t * )block )[11];
const uint64_t m12 = ( ( uint64_t * )block )[12];
const uint64_t m13 = ( ( uint64_t * )block )[13];
const uint64_t m14 = ( ( uint64_t * )block )[14];
const uint64_t m15 = ( ( uint64_t * )block )[15];
#endif
row1l = LOADU( &S->h[0] );
row1h = LOADU( &S->h[2] );
row2l = LOADU( &S->h[4] );
row2h = LOADU( &S->h[6] );
row3l = LOADU( &blake2b_IV[0] );
row3h = LOADU( &blake2b_IV[2] );
row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) );
row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) );
ROUND( 0 );
ROUND( 1 );
ROUND( 2 );
ROUND( 3 );
ROUND( 4 );
ROUND( 5 );
ROUND( 6 );
ROUND( 7 );
ROUND( 8 );
ROUND( 9 );
ROUND( 10 );
ROUND( 11 );
row1l = _mm_xor_si128( row3l, row1l );
row1h = _mm_xor_si128( row3h, row1h );
STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) );
STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) );
row2l = _mm_xor_si128( row4l, row2l );
row2h = _mm_xor_si128( row4h, row2h );
STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) );
STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) );
return 0;
}
int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen )
{
while( inlen > 0 )
{
uint32_t left = S->buflen;
uint32_t fill = 2 * BLAKE2B_BLOCKBYTES - left;
if( inlen > fill )
{
memcpy( S->buf + left, in, fill ); // Fill buffer
S->buflen += fill;
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
blake2b_compress( S, S->buf ); // Compress
memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left
S->buflen -= BLAKE2B_BLOCKBYTES;
in += fill;
inlen -= fill;
}
else // inlen <= fill
{
memcpy( S->buf + left, in, inlen );
S->buflen += ( uint32_t ) inlen; // Be lazy, do not compress
in += inlen;
inlen -= inlen;
}
}
return 0;
}
int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen )
{
if(S->outlen != outlen) return -1;
if( S->buflen > BLAKE2B_BLOCKBYTES )
{
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
blake2b_compress( S, S->buf );
S->buflen -= BLAKE2B_BLOCKBYTES;
memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen );
}
blake2b_increment_counter( S, S->buflen );
blake2b_set_lastblock( S );
memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
blake2b_compress( S, S->buf );
memcpy( out, &S->h[0], outlen );
return 0;
}
int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen )
{
blake2b_state S[1];
/* Verify parameters */
if ( NULL == in && inlen > 0 ) return -1;
if ( NULL == out ) return -1;
if( NULL == key && keylen > 0 ) return -1;
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
if( keylen > BLAKE2B_KEYBYTES ) return -1;
if( keylen )
{
if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1;
}
else
{
if( blake2b_init( S, outlen ) < 0 ) return -1;
}
if( blake2b_update( S, ( uint8_t * )in, inlen ) < 0) return -1;
return blake2b_final( S, out, outlen );
}
#if defined(SUPERCOP)
int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
{
return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 );
}
#endif

View File

@ -0,0 +1,274 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#if defined(_OPENMP)
#include <omp.h>
#endif
#include "blake2.h"
#include "blake2-impl.h"
#define PARALLELISM_DEGREE 4
static int blake2bp_init_leaf( blake2b_state *S, uint8_t outlen, uint8_t keylen, uint64_t offset )
{
blake2b_param P[1];
P->digest_length = outlen;
P->key_length = keylen;
P->fanout = PARALLELISM_DEGREE;
P->depth = 2;
store32(&P->leaf_length, 0);
store64(&P->node_offset, offset);
P->node_depth = 0;
P->inner_length = BLAKE2B_OUTBYTES;
memset( P->reserved, 0, sizeof( P->reserved ) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
blake2b_init_param( S, P );
S->outlen = P->inner_length;
return 0;
}
static int blake2bp_init_root( blake2b_state *S, uint8_t outlen, uint8_t keylen )
{
blake2b_param P[1];
P->digest_length = outlen;
P->key_length = keylen;
P->fanout = PARALLELISM_DEGREE;
P->depth = 2;
store32(&P->leaf_length, 0);
store64(&P->node_offset, 0);
P->node_depth = 1;
P->inner_length = BLAKE2B_OUTBYTES;
memset( P->reserved, 0, sizeof( P->reserved ) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
blake2b_init_param( S, P );
S->outlen = P->digest_length;
return 0;
}
int blake2bp_init( blake2bp_state *S, size_t outlen )
{
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
if( blake2bp_init_root( S->R, ( uint8_t ) outlen, 0 ) < 0 )
return -1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2bp_init_leaf( S->S[i], ( uint8_t ) outlen, 0, i ) < 0 ) return -1;
S->R->last_node = 1;
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
S->outlen = ( uint8_t ) outlen;
return 0;
}
int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen )
{
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
if( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1;
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
if( blake2bp_init_root( S->R, ( uint8_t ) outlen, ( uint8_t ) keylen ) < 0 )
return -1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2bp_init_leaf( S->S[i], ( uint8_t ) outlen, ( uint8_t ) keylen, i ) < 0 )
return -1;
S->R->last_node = 1;
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
S->outlen = ( uint8_t ) outlen;
{
uint8_t block[BLAKE2B_BLOCKBYTES];
memset( block, 0, BLAKE2B_BLOCKBYTES );
memcpy( block, key, keylen );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( S->S[i], block, BLAKE2B_BLOCKBYTES );
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
}
return 0;
}
int blake2bp_update( blake2bp_state *S, const uint8_t *in, size_t inlen )
{
size_t left = S->buflen;
size_t fill = sizeof( S->buf ) - left;
if( left && inlen >= fill )
{
memcpy( S->buf + left, in, fill );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES );
in += fill;
inlen -= fill;
left = 0;
}
#if defined(_OPENMP)
omp_set_num_threads(PARALLELISM_DEGREE);
#pragma omp parallel shared(S)
#else
for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ )
#endif
{
#if defined(_OPENMP)
size_t id__ = ( size_t ) omp_get_thread_num();
#endif
size_t inlen__ = inlen;
const uint8_t *in__ = ( const uint8_t * )in;
in__ += id__ * BLAKE2B_BLOCKBYTES;
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES )
{
blake2b_update( S->S[id__], in__, BLAKE2B_BLOCKBYTES );
in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
}
}
in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES );
inlen %= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
if( inlen > 0 )
memcpy( S->buf + left, in, inlen );
S->buflen = ( uint32_t ) left + ( uint32_t ) inlen;
return 0;
}
int blake2bp_final( blake2bp_state *S, uint8_t *out, size_t outlen )
{
uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES];
if(S->outlen != outlen) return -1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
{
if( S->buflen > i * BLAKE2B_BLOCKBYTES )
{
size_t left = S->buflen - i * BLAKE2B_BLOCKBYTES;
if( left > BLAKE2B_BLOCKBYTES ) left = BLAKE2B_BLOCKBYTES;
blake2b_update( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, left );
}
blake2b_final( S->S[i], hash[i], BLAKE2B_OUTBYTES );
}
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( S->R, hash[i], BLAKE2B_OUTBYTES );
return blake2b_final( S->R, out, outlen );
}
int blake2bp( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen )
{
uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES];
blake2b_state S[PARALLELISM_DEGREE][1];
blake2b_state FS[1];
/* Verify parameters */
if ( NULL == in && inlen > 0 ) return -1;
if ( NULL == out ) return -1;
if ( NULL == key && keylen > 0) return -1;
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
if( keylen > BLAKE2B_KEYBYTES ) return -1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2bp_init_leaf( S[i], ( uint8_t ) outlen, ( uint8_t ) keylen, i ) < 0 )
return -1;
S[PARALLELISM_DEGREE - 1]->last_node = 1; // mark last node
if( keylen > 0 )
{
uint8_t block[BLAKE2B_BLOCKBYTES];
memset( block, 0, BLAKE2B_BLOCKBYTES );
memcpy( block, key, keylen );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( S[i], block, BLAKE2B_BLOCKBYTES );
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
}
#if defined(_OPENMP)
omp_set_num_threads(PARALLELISM_DEGREE);
#pragma omp parallel shared(S,hash)
#else
for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ )
#endif
{
#if defined(_OPENMP)
size_t id__ = ( size_t ) omp_get_thread_num();
#endif
size_t inlen__ = inlen;
const uint8_t *in__ = ( const uint8_t * )in;
in__ += id__ * BLAKE2B_BLOCKBYTES;
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES )
{
blake2b_update( S[id__], in__, BLAKE2B_BLOCKBYTES );
in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
}
if( inlen__ > id__ * BLAKE2B_BLOCKBYTES )
{
const size_t left = inlen__ - id__ * BLAKE2B_BLOCKBYTES;
const size_t len = left <= BLAKE2B_BLOCKBYTES ? left : BLAKE2B_BLOCKBYTES;
blake2b_update( S[id__], in__, len );
}
blake2b_final( S[id__], hash[id__], BLAKE2B_OUTBYTES );
}
if( blake2bp_init_root( FS, ( uint8_t ) outlen, ( uint8_t ) keylen ) < 0 )
return -1;
FS->last_node = 1; // Mark as last node
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( FS, hash[i], BLAKE2B_OUTBYTES );
return blake2b_final( FS, out, outlen );
}

View File

@ -0,0 +1,59 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#pragma once
#ifndef __BLAKE2S_LOAD_SSE2_H__
#define __BLAKE2S_LOAD_SSE2_H__
#define LOAD_MSG_0_1(buf) buf = _mm_set_epi32(m6,m4,m2,m0)
#define LOAD_MSG_0_2(buf) buf = _mm_set_epi32(m7,m5,m3,m1)
#define LOAD_MSG_0_3(buf) buf = _mm_set_epi32(m14,m12,m10,m8)
#define LOAD_MSG_0_4(buf) buf = _mm_set_epi32(m15,m13,m11,m9)
#define LOAD_MSG_1_1(buf) buf = _mm_set_epi32(m13,m9,m4,m14)
#define LOAD_MSG_1_2(buf) buf = _mm_set_epi32(m6,m15,m8,m10)
#define LOAD_MSG_1_3(buf) buf = _mm_set_epi32(m5,m11,m0,m1)
#define LOAD_MSG_1_4(buf) buf = _mm_set_epi32(m3,m7,m2,m12)
#define LOAD_MSG_2_1(buf) buf = _mm_set_epi32(m15,m5,m12,m11)
#define LOAD_MSG_2_2(buf) buf = _mm_set_epi32(m13,m2,m0,m8)
#define LOAD_MSG_2_3(buf) buf = _mm_set_epi32(m9,m7,m3,m10)
#define LOAD_MSG_2_4(buf) buf = _mm_set_epi32(m4,m1,m6,m14)
#define LOAD_MSG_3_1(buf) buf = _mm_set_epi32(m11,m13,m3,m7)
#define LOAD_MSG_3_2(buf) buf = _mm_set_epi32(m14,m12,m1,m9)
#define LOAD_MSG_3_3(buf) buf = _mm_set_epi32(m15,m4,m5,m2)
#define LOAD_MSG_3_4(buf) buf = _mm_set_epi32(m8,m0,m10,m6)
#define LOAD_MSG_4_1(buf) buf = _mm_set_epi32(m10,m2,m5,m9)
#define LOAD_MSG_4_2(buf) buf = _mm_set_epi32(m15,m4,m7,m0)
#define LOAD_MSG_4_3(buf) buf = _mm_set_epi32(m3,m6,m11,m14)
#define LOAD_MSG_4_4(buf) buf = _mm_set_epi32(m13,m8,m12,m1)
#define LOAD_MSG_5_1(buf) buf = _mm_set_epi32(m8,m0,m6,m2)
#define LOAD_MSG_5_2(buf) buf = _mm_set_epi32(m3,m11,m10,m12)
#define LOAD_MSG_5_3(buf) buf = _mm_set_epi32(m1,m15,m7,m4)
#define LOAD_MSG_5_4(buf) buf = _mm_set_epi32(m9,m14,m5,m13)
#define LOAD_MSG_6_1(buf) buf = _mm_set_epi32(m4,m14,m1,m12)
#define LOAD_MSG_6_2(buf) buf = _mm_set_epi32(m10,m13,m15,m5)
#define LOAD_MSG_6_3(buf) buf = _mm_set_epi32(m8,m9,m6,m0)
#define LOAD_MSG_6_4(buf) buf = _mm_set_epi32(m11,m2,m3,m7)
#define LOAD_MSG_7_1(buf) buf = _mm_set_epi32(m3,m12,m7,m13)
#define LOAD_MSG_7_2(buf) buf = _mm_set_epi32(m9,m1,m14,m11)
#define LOAD_MSG_7_3(buf) buf = _mm_set_epi32(m2,m8,m15,m5)
#define LOAD_MSG_7_4(buf) buf = _mm_set_epi32(m10,m6,m4,m0)
#define LOAD_MSG_8_1(buf) buf = _mm_set_epi32(m0,m11,m14,m6)
#define LOAD_MSG_8_2(buf) buf = _mm_set_epi32(m8,m3,m9,m15)
#define LOAD_MSG_8_3(buf) buf = _mm_set_epi32(m10,m1,m13,m12)
#define LOAD_MSG_8_4(buf) buf = _mm_set_epi32(m5,m4,m7,m2)
#define LOAD_MSG_9_1(buf) buf = _mm_set_epi32(m1,m7,m8,m10)
#define LOAD_MSG_9_2(buf) buf = _mm_set_epi32(m5,m6,m4,m2)
#define LOAD_MSG_9_3(buf) buf = _mm_set_epi32(m13,m3,m9,m15)
#define LOAD_MSG_9_4(buf) buf = _mm_set_epi32(m0,m12,m14,m11)
#endif

View File

@ -0,0 +1,229 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#pragma once
#ifndef __BLAKE2S_LOAD_SSE41_H__
#define __BLAKE2S_LOAD_SSE41_H__
#define LOAD_MSG_0_1(buf) \
buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(2,0,2,0)));
#define LOAD_MSG_0_2(buf) \
buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(3,1,3,1)));
#define LOAD_MSG_0_3(buf) \
buf = TOI(_mm_shuffle_ps(TOF(m2), TOF(m3), _MM_SHUFFLE(2,0,2,0)));
#define LOAD_MSG_0_4(buf) \
buf = TOI(_mm_shuffle_ps(TOF(m2), TOF(m3), _MM_SHUFFLE(3,1,3,1)));
#define LOAD_MSG_1_1(buf) \
t0 = _mm_blend_epi16(m1, m2, 0x0C); \
t1 = _mm_slli_si128(m3, 4); \
t2 = _mm_blend_epi16(t0, t1, 0xF0); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,0,3));
#define LOAD_MSG_1_2(buf) \
t0 = _mm_shuffle_epi32(m2,_MM_SHUFFLE(0,0,2,0)); \
t1 = _mm_blend_epi16(m1,m3,0xC0); \
t2 = _mm_blend_epi16(t0, t1, 0xF0); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1));
#define LOAD_MSG_1_3(buf) \
t0 = _mm_slli_si128(m1, 4); \
t1 = _mm_blend_epi16(m2, t0, 0x30); \
t2 = _mm_blend_epi16(m0, t1, 0xF0); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1));
#define LOAD_MSG_1_4(buf) \
t0 = _mm_unpackhi_epi32(m0,m1); \
t1 = _mm_slli_si128(m3, 4); \
t2 = _mm_blend_epi16(t0, t1, 0x0C); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1));
#define LOAD_MSG_2_1(buf) \
t0 = _mm_unpackhi_epi32(m2,m3); \
t1 = _mm_blend_epi16(m3,m1,0x0C); \
t2 = _mm_blend_epi16(t0, t1, 0x0F); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2));
#define LOAD_MSG_2_2(buf) \
t0 = _mm_unpacklo_epi32(m2,m0); \
t1 = _mm_blend_epi16(t0, m0, 0xF0); \
t2 = _mm_slli_si128(m3, 8); \
buf = _mm_blend_epi16(t1, t2, 0xC0);
#define LOAD_MSG_2_3(buf) \
t0 = _mm_blend_epi16(m0, m2, 0x3C); \
t1 = _mm_srli_si128(m1, 12); \
t2 = _mm_blend_epi16(t0,t1,0x03); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,0,3,2));
#define LOAD_MSG_2_4(buf) \
t0 = _mm_slli_si128(m3, 4); \
t1 = _mm_blend_epi16(m0, m1, 0x33); \
t2 = _mm_blend_epi16(t1, t0, 0xC0); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(0,1,2,3));
#define LOAD_MSG_3_1(buf) \
t0 = _mm_unpackhi_epi32(m0,m1); \
t1 = _mm_unpackhi_epi32(t0, m2); \
t2 = _mm_blend_epi16(t1, m3, 0x0C); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2));
#define LOAD_MSG_3_2(buf) \
t0 = _mm_slli_si128(m2, 8); \
t1 = _mm_blend_epi16(m3,m0,0x0C); \
t2 = _mm_blend_epi16(t1, t0, 0xC0); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3));
#define LOAD_MSG_3_3(buf) \
t0 = _mm_blend_epi16(m0,m1,0x0F); \
t1 = _mm_blend_epi16(t0, m3, 0xC0); \
buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(3,0,1,2));
#define LOAD_MSG_3_4(buf) \
t0 = _mm_unpacklo_epi32(m0,m2); \
t1 = _mm_unpackhi_epi32(m1,m2); \
buf = _mm_unpacklo_epi64(t1,t0);
#define LOAD_MSG_4_1(buf) \
t0 = _mm_unpacklo_epi64(m1,m2); \
t1 = _mm_unpackhi_epi64(m0,m2); \
t2 = _mm_blend_epi16(t0,t1,0x33); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3));
#define LOAD_MSG_4_2(buf) \
t0 = _mm_unpackhi_epi64(m1,m3); \
t1 = _mm_unpacklo_epi64(m0,m1); \
buf = _mm_blend_epi16(t0,t1,0x33);
#define LOAD_MSG_4_3(buf) \
t0 = _mm_unpackhi_epi64(m3,m1); \
t1 = _mm_unpackhi_epi64(m2,m0); \
buf = _mm_blend_epi16(t1,t0,0x33);
#define LOAD_MSG_4_4(buf) \
t0 = _mm_blend_epi16(m0,m2,0x03); \
t1 = _mm_slli_si128(t0, 8); \
t2 = _mm_blend_epi16(t1,m3,0x0F); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,2,0,3));
#define LOAD_MSG_5_1(buf) \
t0 = _mm_unpackhi_epi32(m0,m1); \
t1 = _mm_unpacklo_epi32(m0,m2); \
buf = _mm_unpacklo_epi64(t0,t1);
#define LOAD_MSG_5_2(buf) \
t0 = _mm_srli_si128(m2, 4); \
t1 = _mm_blend_epi16(m0,m3,0x03); \
buf = _mm_blend_epi16(t1,t0,0x3C);
#define LOAD_MSG_5_3(buf) \
t0 = _mm_blend_epi16(m1,m0,0x0C); \
t1 = _mm_srli_si128(m3, 4); \
t2 = _mm_blend_epi16(t0,t1,0x30); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,2,3,0));
#define LOAD_MSG_5_4(buf) \
t0 = _mm_unpacklo_epi64(m1,m2); \
t1= _mm_shuffle_epi32(m3, _MM_SHUFFLE(0,2,0,1)); \
buf = _mm_blend_epi16(t0,t1,0x33);
#define LOAD_MSG_6_1(buf) \
t0 = _mm_slli_si128(m1, 12); \
t1 = _mm_blend_epi16(m0,m3,0x33); \
buf = _mm_blend_epi16(t1,t0,0xC0);
#define LOAD_MSG_6_2(buf) \
t0 = _mm_blend_epi16(m3,m2,0x30); \
t1 = _mm_srli_si128(m1, 4); \
t2 = _mm_blend_epi16(t0,t1,0x03); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,3,0));
#define LOAD_MSG_6_3(buf) \
t0 = _mm_unpacklo_epi64(m0,m2); \
t1 = _mm_srli_si128(m1, 4); \
buf = _mm_shuffle_epi32(_mm_blend_epi16(t0,t1,0x0C), _MM_SHUFFLE(2,3,1,0));
#define LOAD_MSG_6_4(buf) \
t0 = _mm_unpackhi_epi32(m1,m2); \
t1 = _mm_unpackhi_epi64(m0,t0); \
buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(3,0,1,2));
#define LOAD_MSG_7_1(buf) \
t0 = _mm_unpackhi_epi32(m0,m1); \
t1 = _mm_blend_epi16(t0,m3,0x0F); \
buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(2,0,3,1));
#define LOAD_MSG_7_2(buf) \
t0 = _mm_blend_epi16(m2,m3,0x30); \
t1 = _mm_srli_si128(m0,4); \
t2 = _mm_blend_epi16(t0,t1,0x03); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,0,2,3));
#define LOAD_MSG_7_3(buf) \
t0 = _mm_unpackhi_epi64(m0,m3); \
t1 = _mm_unpacklo_epi64(m1,m2); \
t2 = _mm_blend_epi16(t0,t1,0x3C); \
buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,2,3,1));
#define LOAD_MSG_7_4(buf) \
t0 = _mm_unpacklo_epi32(m0,m1); \
t1 = _mm_unpackhi_epi32(m1,m2); \
buf = _mm_unpacklo_epi64(t0,t1);
#define LOAD_MSG_8_1(buf) \
t0 = _mm_unpackhi_epi32(m1,m3); \
t1 = _mm_unpacklo_epi64(t0,m0); \
t2 = _mm_blend_epi16(t1,m2,0xC0); \
buf = _mm_shufflehi_epi16(t2,_MM_SHUFFLE(1,0,3,2));
#define LOAD_MSG_8_2(buf) \
t0 = _mm_unpackhi_epi32(m0,m3); \
t1 = _mm_blend_epi16(m2,t0,0xF0); \
buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(0,2,1,3));
#define LOAD_MSG_8_3(buf) \
t0 = _mm_blend_epi16(m2,m0,0x0C); \
t1 = _mm_slli_si128(t0,4); \
buf = _mm_blend_epi16(t1,m3,0x0F);
#define LOAD_MSG_8_4(buf) \
t0 = _mm_blend_epi16(m1,m0,0x30); \
buf = _mm_shuffle_epi32(t0,_MM_SHUFFLE(1,0,3,2));
#define LOAD_MSG_9_1(buf) \
t0 = _mm_blend_epi16(m0,m2,0x03); \
t1 = _mm_blend_epi16(m1,m2,0x30); \
t2 = _mm_blend_epi16(t1,t0,0x0F); \
buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(1,3,0,2));
#define LOAD_MSG_9_2(buf) \
t0 = _mm_slli_si128(m0,4); \
t1 = _mm_blend_epi16(m1,t0,0xC0); \
buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(1,2,0,3));
#define LOAD_MSG_9_3(buf) \
t0 = _mm_unpackhi_epi32(m0,m3); \
t1 = _mm_unpacklo_epi32(m2,m3); \
t2 = _mm_unpackhi_epi64(t0,t1); \
buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(3,0,2,1));
#define LOAD_MSG_9_4(buf) \
t0 = _mm_blend_epi16(m3,m2,0xC0); \
t1 = _mm_unpacklo_epi32(m0,m3); \
t2 = _mm_blend_epi16(t0,t1,0x0F); \
buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,1,2,3));
#endif

View File

@ -0,0 +1,189 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#pragma once
#ifndef __BLAKE2S_LOAD_XOP_H__
#define __BLAKE2S_LOAD_XOP_H__
#define TOB(x) ((x)*4*0x01010101 + 0x03020100) // ..or not TOB
/* Basic VPPERM emulation, for testing purposes */
/*static __m128i _mm_perm_epi8(const __m128i src1, const __m128i src2, const __m128i sel)
{
const __m128i sixteen = _mm_set1_epi8(16);
const __m128i t0 = _mm_shuffle_epi8(src1, sel);
const __m128i s1 = _mm_shuffle_epi8(src2, _mm_sub_epi8(sel, sixteen));
const __m128i mask = _mm_or_si128(_mm_cmpeq_epi8(sel, sixteen),
_mm_cmpgt_epi8(sel, sixteen)); // (>=16) = 0xff : 00
return _mm_blendv_epi8(t0, s1, mask);
}*/
#define LOAD_MSG_0_1(buf) \
buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(6),TOB(4),TOB(2),TOB(0)) );
#define LOAD_MSG_0_2(buf) \
buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(7),TOB(5),TOB(3),TOB(1)) );
#define LOAD_MSG_0_3(buf) \
buf = _mm_perm_epi8(m2, m3, _mm_set_epi32(TOB(6),TOB(4),TOB(2),TOB(0)) );
#define LOAD_MSG_0_4(buf) \
buf = _mm_perm_epi8(m2, m3, _mm_set_epi32(TOB(7),TOB(5),TOB(3),TOB(1)) );
#define LOAD_MSG_1_1(buf) \
t0 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(0),TOB(5),TOB(0),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(6)) );
#define LOAD_MSG_1_2(buf) \
t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(2),TOB(0),TOB(4),TOB(6)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) );
#define LOAD_MSG_1_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(0),TOB(0),TOB(1)) ); \
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) );
#define LOAD_MSG_1_4(buf) \
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(7),TOB(2),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(4)) );
#define LOAD_MSG_2_1(buf) \
t0 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(0),TOB(1),TOB(0),TOB(7)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(4),TOB(0)) );
#define LOAD_MSG_2_2(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(2),TOB(0),TOB(4)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(0)) );
#define LOAD_MSG_2_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(7),TOB(3),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(6)) );
#define LOAD_MSG_2_4(buf) \
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(4),TOB(1),TOB(6),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(6)) );
#define LOAD_MSG_3_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(3),TOB(7)) ); \
t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(5),TOB(1),TOB(0)) );
#define LOAD_MSG_3_2(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(0),TOB(1),TOB(5)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(6),TOB(4),TOB(1),TOB(0)) );
#define LOAD_MSG_3_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(4),TOB(5),TOB(2)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) );
#define LOAD_MSG_3_4(buf) \
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(6)) ); \
buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(4),TOB(2),TOB(6),TOB(0)) );
#define LOAD_MSG_4_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(2),TOB(5),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(6),TOB(2),TOB(1),TOB(5)) );
#define LOAD_MSG_4_2(buf) \
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(4),TOB(7),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) );
#define LOAD_MSG_4_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(6),TOB(0),TOB(0)) ); \
t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(2),TOB(7),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(6)) );
#define LOAD_MSG_4_4(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(4),TOB(0),TOB(1)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(4),TOB(0)) );
#define LOAD_MSG_5_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(6),TOB(2)) ); \
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(4),TOB(2),TOB(1),TOB(0)) );
#define LOAD_MSG_5_2(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(6),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(4)) );
#define LOAD_MSG_5_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(1),TOB(0),TOB(7),TOB(4)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) );
#define LOAD_MSG_5_4(buf) \
t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(5),TOB(0),TOB(1),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(6),TOB(1),TOB(5)) );
#define LOAD_MSG_6_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(4),TOB(0),TOB(1),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(6),TOB(1),TOB(4)) );
#define LOAD_MSG_6_2(buf) \
t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(6),TOB(0),TOB(0),TOB(1)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(5),TOB(7),TOB(0)) );
#define LOAD_MSG_6_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(6),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(4),TOB(5),TOB(1),TOB(0)) );
#define LOAD_MSG_6_4(buf) \
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(2),TOB(3),TOB(7)) ); \
buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) );
#define LOAD_MSG_7_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(0),TOB(7),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(4),TOB(1),TOB(5)) );
#define LOAD_MSG_7_2(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(5),TOB(1),TOB(0),TOB(7)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(6),TOB(0)) );
#define LOAD_MSG_7_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(2),TOB(0),TOB(0),TOB(5)) ); \
t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(4),TOB(1),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(7),TOB(0)) );
#define LOAD_MSG_7_4(buf) \
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(6),TOB(4),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(6),TOB(2),TOB(1),TOB(0)) );
#define LOAD_MSG_8_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(6)) ); \
t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(6),TOB(0)) );
#define LOAD_MSG_8_2(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(4),TOB(3),TOB(5),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(7)) );
#define LOAD_MSG_8_3(buf) \
t0 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(6),TOB(1),TOB(0),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(5),TOB(4)) ); \
#define LOAD_MSG_8_4(buf) \
buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(4),TOB(7),TOB(2)) );
#define LOAD_MSG_9_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(1),TOB(7),TOB(0),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(2),TOB(4),TOB(6)) );
#define LOAD_MSG_9_2(buf) \
buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(6),TOB(4),TOB(2)) );
#define LOAD_MSG_9_3(buf) \
t0 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(3),TOB(5),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(7)) );
#define LOAD_MSG_9_4(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(7)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(4),TOB(6),TOB(0)) );
#endif

View File

@ -0,0 +1,375 @@
/*
BLAKE2 reference source code package - reference C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "blake2.h"
#include "blake2-impl.h"
static const uint32_t blake2s_IV[8] =
{
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
};
static const uint8_t blake2s_sigma[10][16] =
{
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
};
static inline int blake2s_set_lastnode( blake2s_state *S )
{
S->f[1] = ~0U;
return 0;
}
static inline int blake2s_clear_lastnode( blake2s_state *S )
{
S->f[1] = 0U;
return 0;
}
/* Some helper functions, not necessarily useful */
static inline int blake2s_set_lastblock( blake2s_state *S )
{
if( S->last_node ) blake2s_set_lastnode( S );
S->f[0] = ~0U;
return 0;
}
static inline int blake2s_clear_lastblock( blake2s_state *S )
{
if( S->last_node ) blake2s_clear_lastnode( S );
S->f[0] = 0U;
return 0;
}
static inline int blake2s_increment_counter( blake2s_state *S, const uint32_t inc )
{
S->t[0] += inc;
S->t[1] += ( S->t[0] < inc );
return 0;
}
// Parameter-related functions
static inline int blake2s_param_set_digest_length( blake2s_param *P, const uint8_t digest_length )
{
P->digest_length = digest_length;
return 0;
}
static inline int blake2s_param_set_fanout( blake2s_param *P, const uint8_t fanout )
{
P->fanout = fanout;
return 0;
}
static inline int blake2s_param_set_max_depth( blake2s_param *P, const uint8_t depth )
{
P->depth = depth;
return 0;
}
static inline int blake2s_param_set_leaf_length( blake2s_param *P, const uint32_t leaf_length )
{
store32( &P->leaf_length, leaf_length );
return 0;
}
static inline int blake2s_param_set_node_offset( blake2s_param *P, const uint64_t node_offset )
{
store48( P->node_offset, node_offset );
return 0;
}
static inline int blake2s_param_set_node_depth( blake2s_param *P, const uint8_t node_depth )
{
P->node_depth = node_depth;
return 0;
}
static inline int blake2s_param_set_inner_length( blake2s_param *P, const uint8_t inner_length )
{
P->inner_length = inner_length;
return 0;
}
static inline int blake2s_param_set_salt( blake2s_param *P, const uint8_t salt[BLAKE2S_SALTBYTES] )
{
memcpy( P->salt, salt, BLAKE2S_SALTBYTES );
return 0;
}
static inline int blake2s_param_set_personal( blake2s_param *P, const uint8_t personal[BLAKE2S_PERSONALBYTES] )
{
memcpy( P->personal, personal, BLAKE2S_PERSONALBYTES );
return 0;
}
static inline int blake2s_init0( blake2s_state *S )
{
memset( S, 0, sizeof( blake2s_state ) );
for( int i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i];
return 0;
}
#define blake2s_init BLAKE2_IMPL_NAME(blake2s_init)
#define blake2s_init_param BLAKE2_IMPL_NAME(blake2s_init_param)
#define blake2s_init_key BLAKE2_IMPL_NAME(blake2s_init_key)
#define blake2s_update BLAKE2_IMPL_NAME(blake2s_update)
#define blake2s_final BLAKE2_IMPL_NAME(blake2s_final)
#define blake2s BLAKE2_IMPL_NAME(blake2s)
#if defined(__cplusplus)
extern "C" {
#endif
int blake2s_init( blake2s_state *S, size_t outlen );
int blake2s_init_param( blake2s_state *S, const blake2s_param *P );
int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen );
int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen );
int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen );
int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
#if defined(__cplusplus)
}
#endif
/* init2 xors IV with input parameter block */
int blake2s_init_param( blake2s_state *S, const blake2s_param *P )
{
blake2s_init0( S );
uint32_t *p = ( uint32_t * )( P );
/* IV XOR ParamBlock */
for( size_t i = 0; i < 8; ++i )
S->h[i] ^= load32( &p[i] );
S->outlen = P->digest_length;
return 0;
}
// Sequential blake2s initialization
int blake2s_init( blake2s_state *S, size_t outlen )
{
blake2s_param P[1];
/* Move interval verification here? */
if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
P->digest_length = ( uint8_t) outlen;
P->key_length = 0;
P->fanout = 1;
P->depth = 1;
store32( &P->leaf_length, 0 );
store48( &P->node_offset, 0 );
P->node_depth = 0;
P->inner_length = 0;
// memset(P->reserved, 0, sizeof(P->reserved) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2s_init_param( S, P );
}
int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen )
{
blake2s_param P[1];
if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
if ( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1;
P->digest_length = ( uint8_t ) outlen;
P->key_length = ( uint8_t ) keylen;
P->fanout = 1;
P->depth = 1;
store32( &P->leaf_length, 0 );
store48( &P->node_offset, 0 );
P->node_depth = 0;
P->inner_length = 0;
// memset(P->reserved, 0, sizeof(P->reserved) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
if( blake2s_init_param( S, P ) < 0 ) return -1;
{
uint8_t block[BLAKE2S_BLOCKBYTES];
memset( block, 0, BLAKE2S_BLOCKBYTES );
memcpy( block, key, keylen );
blake2s_update( S, block, BLAKE2S_BLOCKBYTES );
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
}
return 0;
}
static int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] )
{
uint32_t m[16];
uint32_t v[16];
for( size_t i = 0; i < 16; ++i )
m[i] = load32( block + i * sizeof( m[i] ) );
for( size_t i = 0; i < 8; ++i )
v[i] = S->h[i];
v[ 8] = blake2s_IV[0];
v[ 9] = blake2s_IV[1];
v[10] = blake2s_IV[2];
v[11] = blake2s_IV[3];
v[12] = S->t[0] ^ blake2s_IV[4];
v[13] = S->t[1] ^ blake2s_IV[5];
v[14] = S->f[0] ^ blake2s_IV[6];
v[15] = S->f[1] ^ blake2s_IV[7];
#define G(r,i,a,b,c,d) \
do { \
a = a + b + m[blake2s_sigma[r][2*i+0]]; \
d = rotr32(d ^ a, 16); \
c = c + d; \
b = rotr32(b ^ c, 12); \
a = a + b + m[blake2s_sigma[r][2*i+1]]; \
d = rotr32(d ^ a, 8); \
c = c + d; \
b = rotr32(b ^ c, 7); \
} while(0)
#define ROUND(r) \
do { \
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
} while(0)
ROUND( 0 );
ROUND( 1 );
ROUND( 2 );
ROUND( 3 );
ROUND( 4 );
ROUND( 5 );
ROUND( 6 );
ROUND( 7 );
ROUND( 8 );
ROUND( 9 );
for( size_t i = 0; i < 8; ++i )
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
#undef G
#undef ROUND
return 0;
}
int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen )
{
while( inlen > 0 )
{
uint32_t left = S->buflen;
uint32_t fill = 2 * BLAKE2S_BLOCKBYTES - left;
if( inlen > fill )
{
memcpy( S->buf + left, in, fill ); // Fill buffer
S->buflen += fill;
blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
blake2s_compress( S, S->buf ); // Compress
memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left
S->buflen -= BLAKE2S_BLOCKBYTES;
in += fill;
inlen -= fill;
}
else // inlen <= fill
{
memcpy( S->buf + left, in, inlen );
S->buflen += ( uint32_t ) inlen; // Be lazy, do not compress
in += inlen;
inlen -= inlen;
}
}
return 0;
}
int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen )
{
uint8_t buffer[BLAKE2S_OUTBYTES];
size_t i;
if(S->outlen != outlen) return -1;
if( S->buflen > BLAKE2S_BLOCKBYTES )
{
blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
blake2s_compress( S, S->buf );
S->buflen -= BLAKE2S_BLOCKBYTES;
memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen );
}
blake2s_increment_counter( S, ( uint32_t )S->buflen );
blake2s_set_lastblock( S );
memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
blake2s_compress( S, S->buf );
for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
store32( buffer + sizeof( S->h[i] ) * i, S->h[i] );
memcpy( out, buffer, outlen );
return 0;
}
int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen )
{
blake2s_state S[1];
/* Verify parameters */
if ( NULL == in && inlen > 0 ) return -1;
if ( NULL == out ) return -1;
if ( NULL == key && keylen > 0 ) return -1;
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
if( keylen > BLAKE2S_KEYBYTES ) return -1;
if( keylen > 0 )
{
if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1;
}
else
{
if( blake2s_init( S, outlen ) < 0 ) return -1;
}
if( blake2s_update( S, ( uint8_t * )in, inlen ) < 0) return -1;
return blake2s_final( S, out, outlen );
}

View File

@ -0,0 +1,91 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#pragma once
#ifndef __BLAKE2S_ROUND_H__
#define __BLAKE2S_ROUND_H__
#define LOAD(p) _mm_load_si128( (__m128i *)(p) )
#define STORE(p,r) _mm_store_si128((__m128i *)(p), r)
#define LOADU(p) _mm_loadu_si128( (__m128i *)(p) )
#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r)
#define TOF(reg) _mm_castsi128_ps((reg))
#define TOI(reg) _mm_castps_si128((reg))
#define LIKELY(x) __builtin_expect((x),1)
/* Microarchitecture-specific macros */
#ifndef HAVE_XOP
#ifdef HAVE_SSSE3
#define _mm_roti_epi32(r, c) ( \
(8==-(c)) ? _mm_shuffle_epi8(r,r8) \
: (16==-(c)) ? _mm_shuffle_epi8(r,r16) \
: _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-(c)) )) )
#else
#define _mm_roti_epi32(r, c) _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-(c)) ))
#endif
#else
/* ... */
#endif
#define G1(row1,row2,row3,row4,buf) \
row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \
row4 = _mm_xor_si128( row4, row1 ); \
row4 = _mm_roti_epi32(row4, -16); \
row3 = _mm_add_epi32( row3, row4 ); \
row2 = _mm_xor_si128( row2, row3 ); \
row2 = _mm_roti_epi32(row2, -12);
#define G2(row1,row2,row3,row4,buf) \
row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \
row4 = _mm_xor_si128( row4, row1 ); \
row4 = _mm_roti_epi32(row4, -8); \
row3 = _mm_add_epi32( row3, row4 ); \
row2 = _mm_xor_si128( row2, row3 ); \
row2 = _mm_roti_epi32(row2, -7);
#define DIAGONALIZE(row1,row2,row3,row4) \
row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(2,1,0,3) ); \
row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(1,0,3,2) ); \
row2 = _mm_shuffle_epi32( row2, _MM_SHUFFLE(0,3,2,1) );
#define UNDIAGONALIZE(row1,row2,row3,row4) \
row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(0,3,2,1) ); \
row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(1,0,3,2) ); \
row2 = _mm_shuffle_epi32( row2, _MM_SHUFFLE(2,1,0,3) );
#if defined(HAVE_XOP)
#include "blake2s-load-xop.h"
#elif defined(HAVE_SSE4_1)
#include "blake2s-load-sse41.h"
#else
#include "blake2s-load-sse2.h"
#endif
#define ROUND(r) \
LOAD_MSG_ ##r ##_1(buf1); \
G1(row1,row2,row3,row4,buf1); \
LOAD_MSG_ ##r ##_2(buf2); \
G2(row1,row2,row3,row4,buf2); \
DIAGONALIZE(row1,row2,row3,row4); \
LOAD_MSG_ ##r ##_3(buf3); \
G1(row1,row2,row3,row4,buf3); \
LOAD_MSG_ ##r ##_4(buf4); \
G2(row1,row2,row3,row4,buf4); \
UNDIAGONALIZE(row1,row2,row3,row4); \
#endif

422
sys/contrib/libb2/blake2s.c Normal file
View File

@ -0,0 +1,422 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "blake2.h"
#include "blake2-impl.h"
#include "blake2-config.h"
#if defined(_MSC_VER)
#include <intrin.h>
#endif
#if defined(HAVE_SSE2)
#include <emmintrin.h>
// MSVC only defines _mm_set_epi64x for x86_64...
#if defined(_MSC_VER) && !defined(_M_X64)
static inline __m128i _mm_set_epi64x( const uint64_t u1, const uint64_t u0 )
{
return _mm_set_epi32( u1 >> 32, u1, u0 >> 32, u0 );
}
#endif
#endif
#if defined(HAVE_SSSE3)
#include <tmmintrin.h>
#endif
#if defined(HAVE_SSE4_1)
#include <smmintrin.h>
#endif
#if defined(HAVE_AVX)
#include <immintrin.h>
#endif
#if defined(HAVE_XOP) && !defined(_MSC_VER)
#include <x86intrin.h>
#endif
#include "blake2s-round.h"
static const uint32_t blake2s_IV[8] =
{
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
};
static const uint8_t blake2s_sigma[10][16] =
{
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
};
/* Some helper functions, not necessarily useful */
static inline int blake2s_set_lastnode( blake2s_state *S )
{
S->f[1] = ~0U;
return 0;
}
static inline int blake2s_clear_lastnode( blake2s_state *S )
{
S->f[1] = 0U;
return 0;
}
static inline int blake2s_set_lastblock( blake2s_state *S )
{
if( S->last_node ) blake2s_set_lastnode( S );
S->f[0] = ~0U;
return 0;
}
static inline int blake2s_clear_lastblock( blake2s_state *S )
{
if( S->last_node ) blake2s_clear_lastnode( S );
S->f[0] = 0U;
return 0;
}
static inline int blake2s_increment_counter( blake2s_state *S, const uint32_t inc )
{
uint64_t t = ( ( uint64_t )S->t[1] << 32 ) | S->t[0];
t += inc;
S->t[0] = ( uint32_t )( t >> 0 );
S->t[1] = ( uint32_t )( t >> 32 );
return 0;
}
// Parameter-related functions
static inline int blake2s_param_set_digest_length( blake2s_param *P, const uint8_t digest_length )
{
P->digest_length = digest_length;
return 0;
}
static inline int blake2s_param_set_fanout( blake2s_param *P, const uint8_t fanout )
{
P->fanout = fanout;
return 0;
}
static inline int blake2s_param_set_max_depth( blake2s_param *P, const uint8_t depth )
{
P->depth = depth;
return 0;
}
static inline int blake2s_param_set_leaf_length( blake2s_param *P, const uint32_t leaf_length )
{
P->leaf_length = leaf_length;
return 0;
}
static inline int blake2s_param_set_node_offset( blake2s_param *P, const uint64_t node_offset )
{
store48( P->node_offset, node_offset );
return 0;
}
static inline int blake2s_param_set_node_depth( blake2s_param *P, const uint8_t node_depth )
{
P->node_depth = node_depth;
return 0;
}
static inline int blake2s_param_set_inner_length( blake2s_param *P, const uint8_t inner_length )
{
P->inner_length = inner_length;
return 0;
}
static inline int blake2s_param_set_salt( blake2s_param *P, const uint8_t salt[BLAKE2S_SALTBYTES] )
{
memcpy( P->salt, salt, BLAKE2S_SALTBYTES );
return 0;
}
static inline int blake2s_param_set_personal( blake2s_param *P, const uint8_t personal[BLAKE2S_PERSONALBYTES] )
{
memcpy( P->personal, personal, BLAKE2S_PERSONALBYTES );
return 0;
}
static inline int blake2s_init0( blake2s_state *S )
{
memset( S, 0, sizeof( blake2s_state ) );
for( int i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i];
return 0;
}
#define blake2s_init BLAKE2_IMPL_NAME(blake2s_init)
#define blake2s_init_param BLAKE2_IMPL_NAME(blake2s_init_param)
#define blake2s_init_key BLAKE2_IMPL_NAME(blake2s_init_key)
#define blake2s_update BLAKE2_IMPL_NAME(blake2s_update)
#define blake2s_final BLAKE2_IMPL_NAME(blake2s_final)
#define blake2s BLAKE2_IMPL_NAME(blake2s)
#if defined(__cplusplus)
extern "C" {
#endif
int blake2s_init( blake2s_state *S, size_t outlen );
int blake2s_init_param( blake2s_state *S, const blake2s_param *P );
int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen );
int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen );
int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen );
int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
#if defined(__cplusplus)
}
#endif
/* init2 xors IV with input parameter block */
int blake2s_init_param( blake2s_state *S, const blake2s_param *P )
{
uint8_t *p, *h, *v;
//blake2s_init0( S );
v = ( uint8_t * )( blake2s_IV );
h = ( uint8_t * )( S->h );
p = ( uint8_t * )( P );
/* IV XOR ParamBlock */
memset( S, 0, sizeof( blake2s_state ) );
for( int i = 0; i < BLAKE2S_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
S->outlen = P->digest_length;
return 0;
}
/* Some sort of default parameter block initialization, for sequential blake2s */
int blake2s_init( blake2s_state *S, size_t outlen )
{
if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
const blake2s_param P =
{
outlen,
0,
1,
1,
0,
{0},
0,
0,
{0},
{0}
};
return blake2s_init_param( S, &P );
}
int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen )
{
if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
if ( ( !key ) || ( !keylen ) || keylen > BLAKE2S_KEYBYTES ) return -1;
const blake2s_param P =
{
outlen,
keylen,
1,
1,
0,
{0},
0,
0,
{0},
{0}
};
if( blake2s_init_param( S, &P ) < 0 )
return -1;
{
uint8_t block[BLAKE2S_BLOCKBYTES];
memset( block, 0, BLAKE2S_BLOCKBYTES );
memcpy( block, key, keylen );
blake2s_update( S, block, BLAKE2S_BLOCKBYTES );
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
}
return 0;
}
static inline int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] )
{
__m128i row1, row2, row3, row4;
__m128i buf1, buf2, buf3, buf4;
#if defined(HAVE_SSE4_1)
__m128i t0, t1;
#if !defined(HAVE_XOP)
__m128i t2;
#endif
#endif
__m128i ff0, ff1;
#if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
const __m128i r8 = _mm_set_epi8( 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1 );
const __m128i r16 = _mm_set_epi8( 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2 );
#endif
#if defined(HAVE_SSE4_1)
const __m128i m0 = LOADU( block + 00 );
const __m128i m1 = LOADU( block + 16 );
const __m128i m2 = LOADU( block + 32 );
const __m128i m3 = LOADU( block + 48 );
#else
const uint32_t m0 = ( ( uint32_t * )block )[ 0];
const uint32_t m1 = ( ( uint32_t * )block )[ 1];
const uint32_t m2 = ( ( uint32_t * )block )[ 2];
const uint32_t m3 = ( ( uint32_t * )block )[ 3];
const uint32_t m4 = ( ( uint32_t * )block )[ 4];
const uint32_t m5 = ( ( uint32_t * )block )[ 5];
const uint32_t m6 = ( ( uint32_t * )block )[ 6];
const uint32_t m7 = ( ( uint32_t * )block )[ 7];
const uint32_t m8 = ( ( uint32_t * )block )[ 8];
const uint32_t m9 = ( ( uint32_t * )block )[ 9];
const uint32_t m10 = ( ( uint32_t * )block )[10];
const uint32_t m11 = ( ( uint32_t * )block )[11];
const uint32_t m12 = ( ( uint32_t * )block )[12];
const uint32_t m13 = ( ( uint32_t * )block )[13];
const uint32_t m14 = ( ( uint32_t * )block )[14];
const uint32_t m15 = ( ( uint32_t * )block )[15];
#endif
row1 = ff0 = LOADU( &S->h[0] );
row2 = ff1 = LOADU( &S->h[4] );
row3 = _mm_setr_epi32( 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A );
row4 = _mm_xor_si128( _mm_setr_epi32( 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 ), LOADU( &S->t[0] ) );
ROUND( 0 );
ROUND( 1 );
ROUND( 2 );
ROUND( 3 );
ROUND( 4 );
ROUND( 5 );
ROUND( 6 );
ROUND( 7 );
ROUND( 8 );
ROUND( 9 );
STOREU( &S->h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) );
STOREU( &S->h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) );
return 0;
}
int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen )
{
while( inlen > 0 )
{
size_t left = S->buflen;
size_t fill = 2 * BLAKE2S_BLOCKBYTES - left;
if( inlen > fill )
{
memcpy( S->buf + left, in, fill ); // Fill buffer
S->buflen += fill;
blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
blake2s_compress( S, S->buf ); // Compress
memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left
S->buflen -= BLAKE2S_BLOCKBYTES;
in += fill;
inlen -= fill;
}
else /* inlen <= fill */
{
memcpy( S->buf + left, in, inlen );
S->buflen += inlen; // Be lazy, do not compress
in += inlen;
inlen -= inlen;
}
}
return 0;
}
int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen )
{
uint8_t buffer[BLAKE2S_OUTBYTES];
if(outlen != S->outlen ) return -1;
if( S->buflen > BLAKE2S_BLOCKBYTES )
{
blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
blake2s_compress( S, S->buf );
S->buflen -= BLAKE2S_BLOCKBYTES;
memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen );
}
blake2s_increment_counter( S, ( uint32_t )S->buflen );
blake2s_set_lastblock( S );
memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
blake2s_compress( S, S->buf );
for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
store32( buffer + sizeof( S->h[i] ) * i, S->h[i] );
memcpy( out, buffer, outlen );
return 0;
}
int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen )
{
blake2s_state S[1];
/* Verify parameters */
if ( NULL == in && inlen > 0 ) return -1;
if ( NULL == out ) return -1;
if ( NULL == key && keylen > 0) return -1;
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
if( keylen > BLAKE2S_KEYBYTES ) return -1;
if( keylen > 0 )
{
if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1;
}
else
{
if( blake2s_init( S, outlen ) < 0 ) return -1;
}
if( blake2s_update( S, ( uint8_t * )in, inlen ) < 0) return -1;
return blake2s_final( S, out, outlen );
}
#if defined(SUPERCOP)
int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
{
return blake2s( out, in, NULL, BLAKE2S_OUTBYTES, (size_t)inlen, 0 );
}
#endif

View File

@ -0,0 +1,274 @@
/*
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#if defined(_OPENMP)
#include <omp.h>
#endif
#include "blake2.h"
#include "blake2-impl.h"
#define PARALLELISM_DEGREE 8
static int blake2sp_init_leaf( blake2s_state *S, uint8_t outlen, uint8_t keylen, uint64_t offset )
{
blake2s_param P[1];
P->digest_length = outlen;
P->key_length = keylen;
P->fanout = PARALLELISM_DEGREE;
P->depth = 2;
P->leaf_length = 0;
store48( P->node_offset, offset );
P->node_depth = 0;
P->inner_length = BLAKE2S_OUTBYTES;
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
blake2s_init_param( S, P );
S->outlen = P->inner_length;
return 0;
}
static int blake2sp_init_root( blake2s_state *S, uint8_t outlen, uint8_t keylen )
{
blake2s_param P[1];
P->digest_length = outlen;
P->key_length = keylen;
P->fanout = PARALLELISM_DEGREE;
P->depth = 2;
P->leaf_length = 0;
store48( P->node_offset, 0ULL );
P->node_depth = 1;
P->inner_length = BLAKE2S_OUTBYTES;
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
blake2s_init_param( S, P );
S->outlen = P->digest_length;
return 0;
}
int blake2sp_init( blake2sp_state *S, size_t outlen )
{
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
if( blake2sp_init_root( S->R, ( uint8_t ) outlen, 0 ) < 0 )
return -1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2sp_init_leaf( S->S[i], ( uint8_t ) outlen, 0, i ) < 0 ) return -1;
S->R->last_node = 1;
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
S->outlen = ( uint8_t ) outlen;
return 0;
}
int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen )
{
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
if( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1;
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
if( blake2sp_init_root( S->R, ( uint8_t ) outlen, ( uint8_t ) keylen ) < 0 )
return -1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2sp_init_leaf( S->S[i], ( uint8_t ) outlen, ( uint8_t ) keylen, i ) < 0 )
return -1;
S->R->last_node = 1;
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
S->outlen = ( uint8_t ) outlen;
{
uint8_t block[BLAKE2S_BLOCKBYTES];
memset( block, 0, BLAKE2S_BLOCKBYTES );
memcpy( block, key, keylen );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( S->S[i], block, BLAKE2S_BLOCKBYTES );
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
}
return 0;
}
int blake2sp_update( blake2sp_state *S, const uint8_t *in, size_t inlen )
{
size_t left = S->buflen;
size_t fill = sizeof( S->buf ) - left;
if( left && inlen >= fill )
{
memcpy( S->buf + left, in, fill );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES );
in += fill;
inlen -= fill;
left = 0;
}
#if defined(_OPENMP)
omp_set_num_threads(PARALLELISM_DEGREE);
#pragma omp parallel shared(S)
#else
for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ )
#endif
{
#if defined(_OPENMP)
size_t id__ = ( size_t ) omp_get_thread_num();
#endif
size_t inlen__ = inlen;
const uint8_t *in__ = ( const uint8_t * )in;
in__ += id__ * BLAKE2S_BLOCKBYTES;
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES )
{
blake2s_update( S->S[id__], in__, BLAKE2S_BLOCKBYTES );
in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
}
}
in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES );
inlen %= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
if( inlen > 0 )
memcpy( S->buf + left, in, inlen );
S->buflen = ( uint32_t ) left + ( uint32_t ) inlen;
return 0;
}
int blake2sp_final( blake2sp_state *S, uint8_t *out, size_t outlen )
{
uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES];
if(S->outlen != outlen) return -1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
{
if( S->buflen > i * BLAKE2S_BLOCKBYTES )
{
size_t left = S->buflen - i * BLAKE2S_BLOCKBYTES;
if( left > BLAKE2S_BLOCKBYTES ) left = BLAKE2S_BLOCKBYTES;
blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, left );
}
blake2s_final( S->S[i], hash[i], BLAKE2S_OUTBYTES );
}
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( S->R, hash[i], BLAKE2S_OUTBYTES );
blake2s_final( S->R, out, outlen );
return 0;
}
int blake2sp( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen )
{
uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES];
blake2s_state S[PARALLELISM_DEGREE][1];
blake2s_state FS[1];
/* Verify parameters */
if ( NULL == in && inlen > 0 ) return -1;
if ( NULL == out ) return -1;
if ( NULL == key && keylen > 0 ) return -1;
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
if( keylen > BLAKE2S_KEYBYTES ) return -1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2sp_init_leaf( S[i], ( uint8_t ) outlen, ( uint8_t ) keylen, i ) < 0 )
return -1;
S[PARALLELISM_DEGREE - 1]->last_node = 1; // mark last node
if( keylen > 0 )
{
uint8_t block[BLAKE2S_BLOCKBYTES];
memset( block, 0, BLAKE2S_BLOCKBYTES );
memcpy( block, key, keylen );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( S[i], block, BLAKE2S_BLOCKBYTES );
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
}
#if defined(_OPENMP)
omp_set_num_threads(PARALLELISM_DEGREE);
#pragma omp parallel shared(S,hash)
#else
for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ )
#endif
{
#if defined(_OPENMP)
size_t id__ = ( size_t ) omp_get_thread_num();
#endif
size_t inlen__ = inlen;
const uint8_t *in__ = ( const uint8_t * )in;
in__ += id__ * BLAKE2S_BLOCKBYTES;
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES )
{
blake2s_update( S[id__], in__, BLAKE2S_BLOCKBYTES );
in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
}
if( inlen__ > id__ * BLAKE2S_BLOCKBYTES )
{
const size_t left = inlen__ - id__ * BLAKE2S_BLOCKBYTES;
const size_t len = left <= BLAKE2S_BLOCKBYTES ? left : BLAKE2S_BLOCKBYTES;
blake2s_update( S[id__], in__, len );
}
blake2s_final( S[id__], hash[id__], BLAKE2S_OUTBYTES );
}
if( blake2sp_init_root( FS, ( uint8_t ) outlen, ( uint8_t ) keylen ) < 0 )
return -1;
FS->last_node = 1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( FS, hash[i], BLAKE2S_OUTBYTES );
return blake2s_final( FS, out, outlen );
}

View File

@ -0,0 +1,163 @@
/* This file is in the public domain. */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <contrib/libb2/blake2.h>
#include <opencrypto/xform_auth.h>
extern int blake2b_init_ref(blake2b_state *S, size_t outlen);
extern int blake2b_init_param_ref(blake2b_state *S, const blake2b_param *P);
extern int blake2b_init_key_ref(blake2b_state *S, size_t outlen,
const void *key, size_t keylen);
extern int blake2b_update_ref(blake2b_state *S, const uint8_t *in,
size_t inlen);
extern int blake2b_final_ref(blake2b_state *S, uint8_t *out, size_t outlen);
extern int blake2b_ref(uint8_t *out, const void *in, const void *key,
size_t outlen, size_t inlen, size_t keylen);
extern int blake2s_init_ref(blake2s_state *S, size_t outlen);
extern int blake2s_init_param_ref(blake2s_state *S, const blake2s_param *P);
extern int blake2s_init_key_ref(blake2s_state *S, size_t outlen,
const void *key, size_t keylen);
extern int blake2s_update_ref(blake2s_state *S, const uint8_t *in,
size_t inlen);
extern int blake2s_final_ref(blake2s_state *S, uint8_t *out, size_t outlen);
extern int blake2s_ref(uint8_t *out, const void *in, const void *key,
size_t outlen, size_t inlen, size_t keylen);
struct blake2b_xform_ctx {
blake2b_state state;
uint8_t key[BLAKE2B_KEYBYTES];
uint16_t klen;
};
CTASSERT(sizeof(union authctx) >= sizeof(struct blake2b_xform_ctx));
static void
blake2b_xform_init(void *vctx)
{
struct blake2b_xform_ctx *ctx = vctx;
int rc;
if (ctx->klen > 0)
rc = blake2b_init_key_ref(&ctx->state, BLAKE2B_OUTBYTES,
ctx->key, ctx->klen);
else
rc = blake2b_init_ref(&ctx->state, BLAKE2B_OUTBYTES);
if (rc != 0)
panic("blake2b_init_key: invalid arguments");
}
static void
blake2b_xform_setkey(void *vctx, const uint8_t *key, uint16_t klen)
{
struct blake2b_xform_ctx *ctx = vctx;
if (klen > sizeof(ctx->key))
panic("invalid klen %u", (unsigned)klen);
memcpy(ctx->key, key, klen);
ctx->klen = klen;
}
static int
blake2b_xform_update(void *vctx, const uint8_t *data, uint16_t len)
{
struct blake2b_xform_ctx *ctx = vctx;
int rc;
rc = blake2b_update_ref(&ctx->state, data, len);
if (rc != 0)
return (EINVAL);
return (0);
}
static void
blake2b_xform_final(uint8_t *out, void *vctx)
{
struct blake2b_xform_ctx *ctx = vctx;
int rc;
rc = blake2b_final_ref(&ctx->state, out, BLAKE2B_OUTBYTES);
if (rc != 0)
panic("blake2b_final: invalid");
}
struct auth_hash auth_hash_blake2b = {
.type = CRYPTO_BLAKE2B,
.name = "Blake2b",
.keysize = BLAKE2B_KEYBYTES,
.hashsize = BLAKE2B_OUTBYTES,
.ctxsize = sizeof(struct blake2b_xform_ctx),
.Setkey = blake2b_xform_setkey,
.Init = blake2b_xform_init,
.Update = blake2b_xform_update,
.Final = blake2b_xform_final,
};
struct blake2s_xform_ctx {
blake2s_state state;
uint8_t key[BLAKE2S_KEYBYTES];
uint16_t klen;
};
CTASSERT(sizeof(union authctx) >= sizeof(struct blake2s_xform_ctx));
static void
blake2s_xform_init(void *vctx)
{
struct blake2s_xform_ctx *ctx = vctx;
int rc;
if (ctx->klen > 0)
rc = blake2s_init_key_ref(&ctx->state, BLAKE2S_OUTBYTES,
ctx->key, ctx->klen);
else
rc = blake2s_init_ref(&ctx->state, BLAKE2S_OUTBYTES);
if (rc != 0)
panic("blake2s_init_key: invalid arguments");
}
static void
blake2s_xform_setkey(void *vctx, const uint8_t *key, uint16_t klen)
{
struct blake2s_xform_ctx *ctx = vctx;
if (klen > sizeof(ctx->key))
panic("invalid klen %u", (unsigned)klen);
memcpy(ctx->key, key, klen);
ctx->klen = klen;
}
static int
blake2s_xform_update(void *vctx, const uint8_t *data, uint16_t len)
{
struct blake2s_xform_ctx *ctx = vctx;
int rc;
rc = blake2s_update_ref(&ctx->state, data, len);
if (rc != 0)
return (EINVAL);
return (0);
}
static void
blake2s_xform_final(uint8_t *out, void *vctx)
{
struct blake2s_xform_ctx *ctx = vctx;
int rc;
rc = blake2s_final_ref(&ctx->state, out, BLAKE2S_OUTBYTES);
if (rc != 0)
panic("blake2s_final: invalid");
}
struct auth_hash auth_hash_blake2s = {
.type = CRYPTO_BLAKE2S,
.name = "Blake2s",
.keysize = BLAKE2S_KEYBYTES,
.hashsize = BLAKE2S_OUTBYTES,
.ctxsize = sizeof(struct blake2s_xform_ctx),
.Setkey = blake2s_xform_setkey,
.Init = blake2s_xform_init,
.Update = blake2s_xform_update,
.Final = blake2s_xform_final,
};

View File

@ -0,0 +1,538 @@
/*-
* Copyright (c) 2018 Conrad Meyer <cem@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/kernel.h>
#include <sys/kobj.h>
#include <sys/lock.h>
#include <sys/module.h>
#include <sys/malloc.h>
#include <sys/rwlock.h>
#include <sys/smp.h>
#include <blake2.h>
#include <opencrypto/cryptodev.h>
#include <cryptodev_if.h>
#if defined(__amd64__)
#include <machine/fpu.h>
#elif defined(__i386__)
#include <machine/npx.h>
#endif
struct blake2_session {
int algo;
size_t klen;
size_t mlen;
uint8_t key[BLAKE2B_KEYBYTES];
bool used;
uint32_t id;
TAILQ_ENTRY(blake2_session) next;
};
CTASSERT((size_t)BLAKE2B_KEYBYTES > (size_t)BLAKE2S_KEYBYTES);
struct blake2_softc {
bool dying;
int32_t cid;
uint32_t sid;
TAILQ_HEAD(blake2_sessions_head, blake2_session) sessions;
struct rwlock lock;
};
static struct mtx_padalign *ctx_mtx;
static struct fpu_kern_ctx **ctx_fpu;
#define ACQUIRE_CTX(i, ctx) \
do { \
(i) = PCPU_GET(cpuid); \
mtx_lock(&ctx_mtx[(i)]); \
(ctx) = ctx_fpu[(i)]; \
} while (0)
#define RELEASE_CTX(i, ctx) \
do { \
mtx_unlock(&ctx_mtx[(i)]); \
(i) = -1; \
(ctx) = NULL; \
} while (0)
static int blake2_newsession(device_t, uint32_t *sidp, struct cryptoini *cri);
static int blake2_freesession(device_t, uint64_t tid);
static void blake2_freesession_locked(struct blake2_softc *sc,
struct blake2_session *ses);
static int blake2_cipher_setup(struct blake2_session *ses,
struct cryptoini *authini);
static int blake2_cipher_process(struct blake2_session *ses,
struct cryptop *crp);
MALLOC_DEFINE(M_BLAKE2, "blake2_data", "Blake2 Data");
static void
blake2_identify(driver_t *drv, device_t parent)
{
/* NB: order 10 is so we get attached after h/w devices */
if (device_find_child(parent, "blaketwo", -1) == NULL &&
BUS_ADD_CHILD(parent, 10, "blaketwo", -1) == 0)
panic("blaketwo: could not attach");
}
static int
blake2_probe(device_t dev)
{
device_set_desc(dev, "Blake2");
return (0);
}
static void
blake2_cleanctx(void)
{
int i;
/* XXX - no way to return driverid */
CPU_FOREACH(i) {
if (ctx_fpu[i] != NULL) {
mtx_destroy(&ctx_mtx[i]);
fpu_kern_free_ctx(ctx_fpu[i]);
}
ctx_fpu[i] = NULL;
}
free(ctx_mtx, M_BLAKE2);
ctx_mtx = NULL;
free(ctx_fpu, M_BLAKE2);
ctx_fpu = NULL;
}
static int
blake2_attach(device_t dev)
{
struct blake2_softc *sc;
int i;
sc = device_get_softc(dev);
sc->dying = false;
TAILQ_INIT(&sc->sessions);
sc->sid = 1;
sc->cid = crypto_get_driverid(dev, CRYPTOCAP_F_HARDWARE |
CRYPTOCAP_F_SYNC);
if (sc->cid < 0) {
device_printf(dev, "Could not get crypto driver id.\n");
return (ENOMEM);
}
ctx_mtx = malloc(sizeof(*ctx_mtx) * (mp_maxid + 1), M_BLAKE2,
M_WAITOK | M_ZERO);
ctx_fpu = malloc(sizeof(*ctx_fpu) * (mp_maxid + 1), M_BLAKE2,
M_WAITOK | M_ZERO);
CPU_FOREACH(i) {
ctx_fpu[i] = fpu_kern_alloc_ctx(0);
mtx_init(&ctx_mtx[i], "bl2fpumtx", NULL, MTX_DEF | MTX_NEW);
}
rw_init(&sc->lock, "blake2_lock");
crypto_register(sc->cid, CRYPTO_BLAKE2B, 0, 0);
crypto_register(sc->cid, CRYPTO_BLAKE2S, 0, 0);
return (0);
}
static int
blake2_detach(device_t dev)
{
struct blake2_softc *sc;
struct blake2_session *ses;
sc = device_get_softc(dev);
rw_wlock(&sc->lock);
TAILQ_FOREACH(ses, &sc->sessions, next) {
if (ses->used) {
rw_wunlock(&sc->lock);
device_printf(dev,
"Cannot detach, sessions still active.\n");
return (EBUSY);
}
}
sc->dying = true;
while ((ses = TAILQ_FIRST(&sc->sessions)) != NULL) {
TAILQ_REMOVE(&sc->sessions, ses, next);
free(ses, M_BLAKE2);
}
rw_wunlock(&sc->lock);
crypto_unregister_all(sc->cid);
rw_destroy(&sc->lock);
blake2_cleanctx();
return (0);
}
static int
blake2_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri)
{
struct blake2_softc *sc;
struct blake2_session *ses;
struct cryptoini *authini;
int error;
if (sidp == NULL || cri == NULL) {
CRYPTDEB("no sidp or cri");
return (EINVAL);
}
sc = device_get_softc(dev);
ses = NULL;
authini = NULL;
for (; cri != NULL; cri = cri->cri_next) {
switch (cri->cri_alg) {
case CRYPTO_BLAKE2B:
case CRYPTO_BLAKE2S:
if (authini != NULL) {
CRYPTDEB("authini already set");
return (EINVAL);
}
authini = cri;
break;
default:
CRYPTDEB("unhandled algorithm");
return (EINVAL);
}
}
if (authini == NULL) {
CRYPTDEB("no cipher");
return (EINVAL);
}
rw_wlock(&sc->lock);
if (sc->dying) {
rw_wunlock(&sc->lock);
return (EINVAL);
}
/*
* Free sessions are inserted at the head of the list. So if the first
* session is used, none are free and we must allocate a new one.
*/
ses = TAILQ_FIRST(&sc->sessions);
if (ses == NULL || ses->used) {
ses = malloc(sizeof(*ses), M_BLAKE2, M_NOWAIT | M_ZERO);
if (ses == NULL) {
rw_wunlock(&sc->lock);
return (ENOMEM);
}
ses->id = sc->sid++;
} else {
TAILQ_REMOVE(&sc->sessions, ses, next);
}
ses->used = true;
TAILQ_INSERT_TAIL(&sc->sessions, ses, next);
rw_wunlock(&sc->lock);
ses->algo = authini->cri_alg;
error = blake2_cipher_setup(ses, authini);
if (error != 0) {
CRYPTDEB("setup failed");
rw_wlock(&sc->lock);
blake2_freesession_locked(sc, ses);
rw_wunlock(&sc->lock);
return (error);
}
*sidp = ses->id;
return (0);
}
static void
blake2_freesession_locked(struct blake2_softc *sc, struct blake2_session *ses)
{
uint32_t sid;
rw_assert(&sc->lock, RA_WLOCKED);
sid = ses->id;
TAILQ_REMOVE(&sc->sessions, ses, next);
explicit_bzero(ses, sizeof(*ses));
ses->id = sid;
TAILQ_INSERT_HEAD(&sc->sessions, ses, next);
}
static int
blake2_freesession(device_t dev, uint64_t tid)
{
struct blake2_softc *sc;
struct blake2_session *ses;
uint32_t sid;
sc = device_get_softc(dev);
sid = ((uint32_t)tid) & 0xffffffff;
rw_wlock(&sc->lock);
TAILQ_FOREACH_REVERSE(ses, &sc->sessions, blake2_sessions_head, next) {
if (ses->id == sid)
break;
}
if (ses == NULL) {
rw_wunlock(&sc->lock);
return (EINVAL);
}
blake2_freesession_locked(sc, ses);
rw_wunlock(&sc->lock);
return (0);
}
static int
blake2_process(device_t dev, struct cryptop *crp, int hint __unused)
{
struct blake2_softc *sc;
struct blake2_session *ses;
struct cryptodesc *crd, *authcrd;
int error;
sc = device_get_softc(dev);
ses = NULL;
error = 0;
authcrd = NULL;
/* Sanity check. */
if (crp == NULL)
return (EINVAL);
if (crp->crp_callback == NULL || crp->crp_desc == NULL) {
error = EINVAL;
goto out;
}
for (crd = crp->crp_desc; crd != NULL; crd = crd->crd_next) {
switch (crd->crd_alg) {
case CRYPTO_BLAKE2B:
case CRYPTO_BLAKE2S:
if (authcrd != NULL) {
error = EINVAL;
goto out;
}
authcrd = crd;
break;
default:
error = EINVAL;
goto out;
}
}
rw_rlock(&sc->lock);
TAILQ_FOREACH_REVERSE(ses, &sc->sessions, blake2_sessions_head, next) {
if (ses->id == (crp->crp_sid & 0xffffffff))
break;
}
rw_runlock(&sc->lock);
if (ses == NULL) {
error = EINVAL;
goto out;
}
error = blake2_cipher_process(ses, crp);
if (error != 0)
goto out;
out:
crp->crp_etype = error;
crypto_done(crp);
return (error);
}
static device_method_t blake2_methods[] = {
DEVMETHOD(device_identify, blake2_identify),
DEVMETHOD(device_probe, blake2_probe),
DEVMETHOD(device_attach, blake2_attach),
DEVMETHOD(device_detach, blake2_detach),
DEVMETHOD(cryptodev_newsession, blake2_newsession),
DEVMETHOD(cryptodev_freesession, blake2_freesession),
DEVMETHOD(cryptodev_process, blake2_process),
DEVMETHOD_END
};
static driver_t blake2_driver = {
"blaketwo",
blake2_methods,
sizeof(struct blake2_softc),
};
static devclass_t blake2_devclass;
DRIVER_MODULE(blake2, nexus, blake2_driver, blake2_devclass, 0, 0);
MODULE_VERSION(blake2, 1);
MODULE_DEPEND(blake2, crypto, 1, 1, 1);
static int
blake2_cipher_setup(struct blake2_session *ses, struct cryptoini *authini)
{
int keylen;
CTASSERT((size_t)BLAKE2S_OUTBYTES <= (size_t)BLAKE2B_OUTBYTES);
if (authini->cri_mlen < 0)
return (EINVAL);
switch (ses->algo) {
case CRYPTO_BLAKE2S:
if (authini->cri_mlen != 0 &&
authini->cri_mlen > BLAKE2S_OUTBYTES)
return (EINVAL);
/* FALLTHROUGH */
case CRYPTO_BLAKE2B:
if (authini->cri_mlen != 0 &&
authini->cri_mlen > BLAKE2B_OUTBYTES)
return (EINVAL);
if (authini->cri_klen % 8 != 0)
return (EINVAL);
keylen = authini->cri_klen / 8;
if (keylen > sizeof(ses->key) ||
(ses->algo == CRYPTO_BLAKE2S && keylen > BLAKE2S_KEYBYTES))
return (EINVAL);
ses->klen = keylen;
memcpy(ses->key, authini->cri_key, keylen);
ses->mlen = authini->cri_mlen;
}
return (0);
}
static int
blake2b_applicator(void *state, void *buf, u_int len)
{
int rc;
rc = blake2b_update(state, buf, len);
if (rc != 0)
return (EINVAL);
return (0);
}
static int
blake2s_applicator(void *state, void *buf, u_int len)
{
int rc;
rc = blake2s_update(state, buf, len);
if (rc != 0)
return (EINVAL);
return (0);
}
static int
blake2_cipher_process(struct blake2_session *ses, struct cryptop *crp)
{
union {
blake2b_state sb;
blake2s_state ss;
} bctx;
char res[BLAKE2B_OUTBYTES];
struct fpu_kern_ctx *ctx;
int ctxidx;
bool kt;
struct cryptodesc *crd;
int error, rc;
size_t hashlen;
crd = crp->crp_desc;
ctx = NULL;
ctxidx = 0;
error = EINVAL;
kt = is_fpu_kern_thread(0);
if (!kt) {
ACQUIRE_CTX(ctxidx, ctx);
fpu_kern_enter(curthread, ctx,
FPU_KERN_NORMAL | FPU_KERN_KTHR);
}
if (crd->crd_flags != 0)
goto out;
switch (ses->algo) {
case CRYPTO_BLAKE2B:
if (ses->mlen != 0)
hashlen = ses->mlen;
else
hashlen = BLAKE2B_OUTBYTES;
if (ses->klen > 0)
rc = blake2b_init_key(&bctx.sb, hashlen, ses->key, ses->klen);
else
rc = blake2b_init(&bctx.sb, hashlen);
if (rc != 0)
goto out;
error = crypto_apply(crp->crp_flags, crp->crp_buf, crd->crd_skip,
crd->crd_len, blake2b_applicator, &bctx.sb);
if (error != 0)
goto out;
rc = blake2b_final(&bctx.sb, res, hashlen);
if (rc != 0) {
error = EINVAL;
goto out;
}
break;
case CRYPTO_BLAKE2S:
if (ses->mlen != 0)
hashlen = ses->mlen;
else
hashlen = BLAKE2S_OUTBYTES;
if (ses->klen > 0)
rc = blake2s_init_key(&bctx.ss, hashlen, ses->key, ses->klen);
else
rc = blake2s_init(&bctx.ss, hashlen);
if (rc != 0)
goto out;
error = crypto_apply(crp->crp_flags, crp->crp_buf, crd->crd_skip,
crd->crd_len, blake2s_applicator, &bctx.ss);
if (error != 0)
goto out;
rc = blake2s_final(&bctx.ss, res, hashlen);
if (rc != 0) {
error = EINVAL;
goto out;
}
break;
default:
panic("unreachable");
}
crypto_copyback(crp->crp_flags, crp->crp_buf, crd->crd_inject, hashlen,
(void *)res);
out:
if (!kt) {
fpu_kern_leave(curthread, ctx);
RELEASE_CTX(ctxidx, ctx);
}
return (error);
}

View File

@ -0,0 +1,7 @@
/* This file is in the public domain */
/* $FreeBSD$ */
#pragma once
#include <sys/param.h>
#include <sys/systm.h> /* memcpy, memset */

View File

@ -0,0 +1,3 @@
/* This file is in the public domain. */
/* $FreeBSD$ */
#include "blake2b.c"

View File

@ -0,0 +1,3 @@
/* This file is in the public domain. */
/* $FreeBSD$ */
#include "blake2b.c"

View File

@ -0,0 +1,3 @@
/* This file is in the public domain. */
/* $FreeBSD$ */
#include "blake2b.c"

View File

@ -0,0 +1,3 @@
/* This file is in the public domain. */
/* $FreeBSD$ */
#include "blake2b.c"

View File

@ -0,0 +1,3 @@
/* This file is in the public domain. */
/* $FreeBSD$ */
#include "blake2b.c"

View File

@ -0,0 +1,3 @@
/* This file is in the public domain. */
/* $FreeBSD$ */
#include "blake2s.c"

View File

@ -0,0 +1,3 @@
/* This file is in the public domain. */
/* $FreeBSD$ */
#include "blake2s.c"

View File

@ -0,0 +1,3 @@
/* This file is in the public domain. */
/* $FreeBSD$ */
#include "blake2s.c"

View File

@ -0,0 +1,3 @@
/* This file is in the public domain. */
/* $FreeBSD$ */
#include "blake2s.c"

View File

@ -0,0 +1,3 @@
/* This file is in the public domain. */
/* $FreeBSD$ */
#include "blake2s.c"

View File

@ -0,0 +1,19 @@
/* This file is in the public domain */
/* $FreeBSD$ */
#pragma once
#include <sys/endian.h>
#if _BYTE_ORDER == _LITTLE_ENDIAN
#define NATIVE_LITTLE_ENDIAN 1
#else
/* #undef NATIVE_LITTLE_ENDIAN */
#endif
#if defined(__ARM_FEATURE_UNALIGNED) \
|| defined(__i386__) || defined(__x86_64__) \
|| defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \
/* #undef HAVE_ALIGNED_ACCESS_REQUIRED */
#else
#define HAVE_ALIGNED_ACCESS_REQUIRED 1
#endif

View File

@ -0,0 +1,3 @@
/* This file is in the public domain */
/* $FreeBSD$ */
#include "blake2_kfreebsd.h"

View File

@ -0,0 +1,3 @@
/* This file is in the public domain */
/* $FreeBSD$ */
#include "blake2_kfreebsd.h"

View File

@ -0,0 +1,3 @@
/* This file is in the public domain */
/* $FreeBSD$ */
#include "blake2_kfreebsd.h"

View File

@ -0,0 +1,3 @@
/* This file is in the public domain */
/* $FreeBSD$ */
#include "blake2_kfreebsd.h"

View File

@ -0,0 +1,3 @@
/* This file is in the public domain */
/* $FreeBSD$ */
#include "blake2_kfreebsd.h"

View File

@ -68,6 +68,7 @@ SUBDIR= \
${_bxe} \
${_bios} \
${_bktr} \
${_blake2} \
${_bm} \
bnxt \
bridgestp \
@ -635,6 +636,9 @@ _amdsmn= amdsmn
_amdtemp= amdtemp
_arcmsr= arcmsr
_asmc= asmc
.if ${MK_CRYPT} != "no" || defined(ALL_MODULES)
_blake2= blake2
.endif
_bytgpio= bytgpio
_chvgpio= chvgpio
_ciss= ciss

View File

@ -0,0 +1,90 @@
# $FreeBSD$
.PATH: ${SRCTOP}/sys/contrib/libb2
.PATH: ${SRCTOP}/sys/crypto/blake2
.PATH: ${SRCTOP}/sys/opencrypto
.PATH: ${SRCTOP}/contrib/llvm/tools/clang/lib/Headers
KMOD = blake2
# Vendor sources
SRCS += blake2.h
SRCS += blake2-impl.h
SRCS += blake2-config.h
SRCS += blake2-dispatch.c
SRCS += blake2bp.c
SRCS += blake2sp.c
CFLAGS += -I${SRCTOP}/sys/contrib/libb2
CFLAGS += -I${SRCTOP}/sys/crypto/blake2
# x86 specific optimization headers:
SRCS += blake2b-load-sse2.h
SRCS += blake2b-load-sse41.h
SRCS += blake2b-round.h
SRCS += blake2s-load-sse2.h
SRCS += blake2s-load-sse41.h
SRCS += blake2s-load-xop.h
SRCS += blake2s-round.h
# C files to build a dispatched fat implementation on x86
SRCS_IN += blake2b-sse2.c
SRCS_IN += blake2b-ssse3.c
SRCS_IN += blake2b-sse41.c
SRCS_IN += blake2b-avx.c
SRCS_IN += blake2b-xop.c
SRCS_IN += blake2s-sse2.c
SRCS_IN += blake2s-ssse3.c
SRCS_IN += blake2s-sse41.c
SRCS_IN += blake2s-avx.c
SRCS_IN += blake2s-xop.c
OBJS+= ${SRCS_IN:S/.c/.o/g}
CFLAGS.blake2b-sse2.c += -DSUFFIX=_sse2 -msse2
CFLAGS.blake2s-sse2.c += -DSUFFIX=_sse2 -msse2
CFLAGS.blake2b-ssse3.c += -DSUFFIX=_ssse3 -msse2 -mssse3
CFLAGS.blake2s-ssse3.c += -DSUFFIX=_ssse3 -msse2 -mssse3
CFLAGS.blake2b-sse41.c += -DSUFFIX=_sse41 -msse2 -mssse3 -msse4.1
CFLAGS.blake2s-sse41.c += -DSUFFIX=_sse41 -msse2 -mssse3 -msse4.1
CFLAGS.blake2b-avx.c += -DSUFFIX=_avx -msse2 -mssse3 -msse4.1 -mavx
CFLAGS.blake2s-avx.c += -DSUFFIX=_avx -msse2 -mssse3 -msse4.1 -mavx
CFLAGS.blake2b-xop.c += -DSUFFIX=_xop -msse2 -mssse3 -msse4.1 -mavx -mxop
CFLAGS.blake2s-xop.c += -DSUFFIX=_xop -msse2 -mssse3 -msse4.1 -mavx -mxop
.for src in ${SRCS_IN}
${src:S/.c/.o/}: ${src}
${CC} -c ${CFLAGS:N-nostdinc} ${CFLAGS.${src}} ${WERROR} ${PROF} \
-D_MM_MALLOC_H_INCLUDED ${.IMPSRC}
${CTFCONVERT_CMD}
${src:S/.c/.o/}: intrin.h emmintrin.h tmmintrin.h smmintrin.h immintrin.h \
x86intrin.h ${SRCS:M*.h}
.endfor
# FreeBSD-specific sources:
SRCS += blake2_kfreebsd.h
SRCS += config.h
SRCS += stddef.h
SRCS += stdint.h
SRCS += stdio.h
SRCS += stdlib.h
SRCS += string.h
SRCS += blake2_cryptodev.c
SRCS += opt_param.h cryptodev_if.h
WARNS ?= 6
.include <bsd.kmod.mk>
CWARNFLAGS.blake2-dispatch.c += -Wno-error=unused-const-variable
CWARNFLAGS += ${NO_WCAST_QUAL}

View File

@ -9,6 +9,8 @@
.PATH: ${SRCTOP}/sys/crypto/sha2
.PATH: ${SRCTOP}/sys/crypto/siphash
.PATH: ${SRCTOP}/sys/crypto/skein
.PATH: ${SRCTOP}/sys/crypto/blake2
.PATH: ${SRCTOP}/sys/contrib/libb2
KMOD = crypto
SRCS = crypto.c cryptodev_if.c
@ -31,6 +33,14 @@ AFLAGS+= --defsym SKEIN_LOOP=0
.endif
SRCS += siphash.c
SRCS += gmac.c gfmult.c
SRCS += blake2b-ref.c
SRCS += blake2s-ref.c
SRCS += blake2-sw.c
CFLAGS.blake2b-ref.c += -I${SRCTOP}/sys/crypto/blake2 -DSUFFIX=_ref
CFLAGS.blake2s-ref.c += -I${SRCTOP}/sys/crypto/blake2 -DSUFFIX=_ref
CFLAGS.blake2-sw.c += -I${SRCTOP}/sys/crypto/blake2
CWARNFLAGS.blake2b-ref.c += ${NO_WCAST_QUAL}
CWARNFLAGS.blake2s-ref.c += ${NO_WCAST_QUAL}
SRCS += opt_param.h cryptodev_if.h bus_if.h device_if.h
SRCS += opt_ddb.h

View File

@ -492,6 +492,14 @@ cryptof_ioctl(
case CRYPTO_NULL_HMAC:
thash = &auth_hash_null;
break;
case CRYPTO_BLAKE2B:
thash = &auth_hash_blake2b;
break;
case CRYPTO_BLAKE2S:
thash = &auth_hash_blake2s;
break;
default:
CRYPTDEB("invalid mac");
SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__);

View File

@ -178,7 +178,9 @@
#define CRYPTO_AES_128_NIST_GMAC 26 /* auth side */
#define CRYPTO_AES_192_NIST_GMAC 27 /* auth side */
#define CRYPTO_AES_256_NIST_GMAC 28 /* auth side */
#define CRYPTO_ALGORITHM_MAX 28 /* Keep updated - see below */
#define CRYPTO_BLAKE2B 29 /* Blake2b hash */
#define CRYPTO_BLAKE2S 30 /* Blake2s hash */
#define CRYPTO_ALGORITHM_MAX 30 /* Keep updated - see below */
#define CRYPTO_ALGO_VALID(x) ((x) >= CRYPTO_ALGORITHM_MIN && \
(x) <= CRYPTO_ALGORITHM_MAX)

View File

@ -372,6 +372,11 @@ swcr_authprepare(struct auth_hash *axf, struct swcr_data *sw, u_char *key,
axf->Final(buf, sw->sw_ictx);
break;
}
case CRYPTO_BLAKE2B:
case CRYPTO_BLAKE2S:
axf->Setkey(sw->sw_ictx, key, klen);
axf->Init(sw->sw_ictx);
break;
default:
printf("%s: CRD_F_KEY_EXPLICIT flag given, but algorithm %d "
"doesn't use keys.\n", __func__, axf->type);
@ -438,6 +443,8 @@ swcr_authcompute(struct cryptodesc *crd, struct swcr_data *sw, caddr_t buf,
axf->Final(aalg, &ctx);
break;
case CRYPTO_BLAKE2B:
case CRYPTO_BLAKE2S:
case CRYPTO_NULL_HMAC:
axf->Final(aalg, &ctx);
break;
@ -946,6 +953,25 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri)
(*swd)->sw_axf = axf;
break;
case CRYPTO_BLAKE2B:
axf = &auth_hash_blake2b;
goto auth5common;
case CRYPTO_BLAKE2S:
axf = &auth_hash_blake2s;
auth5common:
(*swd)->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA,
M_NOWAIT);
if ((*swd)->sw_ictx == NULL) {
swcr_freesession_locked(dev, i);
rw_runlock(&swcr_sessions_lock);
return ENOBUFS;
}
axf->Setkey((*swd)->sw_ictx, cri->cri_key,
cri->cri_klen / 8);
axf->Init((*swd)->sw_ictx);
(*swd)->sw_axf = axf;
break;
case CRYPTO_DEFLATE_COMP:
cxf = &comp_algo_deflate;
(*swd)->sw_cxf = cxf;
@ -1049,6 +1075,8 @@ swcr_freesession_locked(device_t dev, u_int64_t tid)
}
break;
case CRYPTO_BLAKE2B:
case CRYPTO_BLAKE2S:
case CRYPTO_MD5:
case CRYPTO_SHA1:
axf = swd->sw_axf;
@ -1155,6 +1183,8 @@ swcr_process(device_t dev, struct cryptop *crp, int hint)
case CRYPTO_SHA1_KPDK:
case CRYPTO_MD5:
case CRYPTO_SHA1:
case CRYPTO_BLAKE2B:
case CRYPTO_BLAKE2S:
if ((crp->crp_etype = swcr_authcompute(crd, sw,
crp->crp_buf, crp->crp_flags)) != 0)
goto done;
@ -1246,6 +1276,8 @@ swcr_attach(device_t dev)
REGISTER(CRYPTO_AES_256_NIST_GMAC);
REGISTER(CRYPTO_CAMELLIA_CBC);
REGISTER(CRYPTO_DEFLATE_COMP);
REGISTER(CRYPTO_BLAKE2B);
REGISTER(CRYPTO_BLAKE2S);
#undef REGISTER
return 0;

View File

@ -75,6 +75,8 @@ extern struct auth_hash auth_hash_hmac_sha2_512;
extern struct auth_hash auth_hash_nist_gmac_aes_128;
extern struct auth_hash auth_hash_nist_gmac_aes_192;
extern struct auth_hash auth_hash_nist_gmac_aes_256;
extern struct auth_hash auth_hash_blake2b;
extern struct auth_hash auth_hash_blake2s;
union authctx {
MD5_CTX md5ctx;

View File

@ -5,6 +5,12 @@ PACKAGE= tests
TESTSDIR= ${TESTSBASE}/sys/opencrypto
BINDIR= ${TESTSDIR}
CFLAGS+= -I${SRCTOP}/tests
CFLAGS.blake2_test.c += -I${SRCTOP}/sys/opencrypto
CFLAGS.blake2_test.c += -I${SRCTOP}/sys/contrib/libb2
ATF_TESTS_C+= blake2_test
PLAIN_TESTS_SH= runtests
TEST_METADATA.runtests+= required_programs="python"
@ -14,4 +20,6 @@ PYMODULES= cryptodev.py cryptodevh.py cryptotest.py dpkt.py
${PACKAGE}FILES+= ${PYMODULES}
WARNS?= 6
.include <bsd.test.mk>

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,209 @@
/*-
* Copyright (c) 2018 Conrad Meyer <cem@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
/*
* Derived from blake2b-test.c and blake2s-test.c:
*
* BLAKE2 reference source code package - optimized C implementations
*
* Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
*
* To the extent possible under law, the author(s) have dedicated all copyright
* and related and neighboring rights to this software to the public domain
* worldwide. This software is distributed without any warranty.
*
* You should have received a copy of the CC0 Public Domain Dedication along with
* this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#include <sys/param.h>
#include <errno.h>
#include <fcntl.h>
#include <string.h>
#include <atf-c.h>
/* Be sure to include tree copy rather than system copy. */
#include "cryptodev.h"
#include "freebsd_test_suite/macros.h"
#include <blake2.h>
#include "blake2-kat.h"
static uint8_t key2b[BLAKE2B_KEYBYTES];
static uint8_t key2s[BLAKE2S_KEYBYTES];
static uint8_t katbuf[KAT_LENGTH];
static void
initialize_constant_buffers(void)
{
size_t i;
for (i = 0; i < sizeof(key2b); i++)
key2b[i] = (uint8_t)i;
for (i = 0; i < sizeof(key2s); i++)
key2s[i] = (uint8_t)i;
for (i = 0; i < sizeof(katbuf); i++)
katbuf[i] = (uint8_t)i;
}
static int
get_handle_fd(void)
{
int dc_fd, fd;
dc_fd = open("/dev/crypto", O_RDWR);
/*
* Why do we do this dance instead of just operating on /dev/crypto
* directly? I have no idea.
*/
ATF_REQUIRE(dc_fd >= 0);
ATF_REQUIRE(ioctl(dc_fd, CRIOGET, &fd) != -1);
close(dc_fd);
return (fd);
}
static int
create_session(int fd, int alg, int crid, const void *key, size_t klen)
{
struct session2_op sop;
memset(&sop, 0, sizeof(sop));
sop.mac = alg;
sop.mackey = key;
sop.mackeylen = klen;
sop.crid = crid;
ATF_REQUIRE_MSG(ioctl(fd, CIOCGSESSION2, &sop) >= 0,
"alg %d keylen %zu, errno=%d (%s)", alg, klen, errno,
strerror(errno));
return (sop.ses);
}
static void
do_cryptop(int fd, int ses, size_t inlen, void *out)
{
struct crypt_op cop;
memset(&cop, 0, sizeof(cop));
cop.ses = ses;
cop.len = inlen;
cop.src = katbuf;
cop.mac = out;
ATF_CHECK_MSG(ioctl(fd, CIOCCRYPT, &cop) >= 0, "ioctl(CIOCCRYPT)");
}
static void
test_blake2b_vectors(int crid, const char *modname)
{
uint8_t hash[BLAKE2B_OUTBYTES];
int fd, ses;
size_t i;
ATF_REQUIRE_KERNEL_MODULE(modname);
ATF_REQUIRE_KERNEL_MODULE("cryptodev");
initialize_constant_buffers();
fd = get_handle_fd();
ses = create_session(fd, CRYPTO_BLAKE2B, crid, key2b, sizeof(key2b));
for (i = 0; i < sizeof(katbuf); i++) {
do_cryptop(fd, ses, i, hash);
ATF_CHECK_EQ_MSG(
memcmp(hash, blake2b_keyed_kat[i], sizeof(hash)),
0,
"different at %zu", i);
}
}
static void
test_blake2s_vectors(int crid, const char *modname)
{
uint8_t hash[BLAKE2S_OUTBYTES];
int fd, ses;
size_t i;
ATF_REQUIRE_KERNEL_MODULE(modname);
ATF_REQUIRE_KERNEL_MODULE("cryptodev");
initialize_constant_buffers();
fd = get_handle_fd();
ses = create_session(fd, CRYPTO_BLAKE2S, crid, key2s, sizeof(key2s));
for (i = 0; i < sizeof(katbuf); i++) {
do_cryptop(fd, ses, i, hash);
ATF_CHECK_EQ_MSG(
memcmp(hash, blake2s_keyed_kat[i], sizeof(hash)),
0,
"different at %zu", i);
}
}
ATF_TC_WITHOUT_HEAD(blake2b_vectors);
ATF_TC_BODY(blake2b_vectors, tc)
{
test_blake2b_vectors(CRYPTO_FLAG_SOFTWARE, "nexus/cryptosoft");
}
ATF_TC_WITHOUT_HEAD(blake2s_vectors);
ATF_TC_BODY(blake2s_vectors, tc)
{
test_blake2s_vectors(CRYPTO_FLAG_SOFTWARE, "nexus/cryptosoft");
}
#if defined(__i386__) || defined(__amd64__)
ATF_TC_WITHOUT_HEAD(blake2b_vectors_x86);
ATF_TC_BODY(blake2b_vectors_x86, tc)
{
test_blake2b_vectors(CRYPTO_FLAG_HARDWARE, "nexus/blake2");
}
ATF_TC_WITHOUT_HEAD(blake2s_vectors_x86);
ATF_TC_BODY(blake2s_vectors_x86, tc)
{
test_blake2s_vectors(CRYPTO_FLAG_HARDWARE, "nexus/blake2");
}
#endif
ATF_TP_ADD_TCS(tp)
{
ATF_TP_ADD_TC(tp, blake2b_vectors);
ATF_TP_ADD_TC(tp, blake2s_vectors);
#if defined(__i386__) || defined(__amd64__)
ATF_TP_ADD_TC(tp, blake2b_vectors_x86);
ATF_TP_ADD_TC(tp, blake2s_vectors_x86);
#endif
return (atf_no_error());
}