Import Blake2 algorithms (blake2b, blake2s) from libb2
The upstream repository is on github BLAKE2/libb2. Files landed in
sys/contrib/libb2 are the unmodified upstream files, except for one
difference: secure_zero_memory's contents have been replaced with
explicit_bzero() only because the previous implementation broke powerpc
link. Preferential use of explicit_bzero() is in progress upstream, so
it is anticipated we will be able to drop this diff in the future.
sys/crypto/blake2 contains the source files needed to port libb2 to our
build system, a wrapped (limited) variant of the algorithm to match the API
of our auth_transform softcrypto abstraction, incorporation into the Open
Crypto Framework (OCF) cryptosoft(4) driver, as well as an x86 SSE/AVX
accelerated OCF driver, blake2(4).
Optimized variants of blake2 are compiled for a number of x86 machines
(anything from SSE2 to AVX + XOP). On those machines, FPU context will need
to be explicitly saved before using blake2(4)-provided algorithms directly.
Use via cryptodev / OCF saves FPU state automatically, and use via the
auth_transform softcrypto abstraction does not use FPU.
The intent of the OCF driver is mostly to enable testing in userspace via
/dev/crypto. ATF tests are added with published KAT test vectors to
validate correctness.
Reviewed by: jhb, markj
Obtained from: github BLAKE2/libb2
Differential Revision: https://reviews.freebsd.org/D14662
2018-03-21 16:18:14 +00:00
|
|
|
/*
|
|
|
|
BLAKE2 reference source code package - optimized C implementations
|
|
|
|
|
|
|
|
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
|
|
|
|
|
|
|
|
To the extent possible under law, the author(s) have dedicated all copyright
|
|
|
|
and related and neighboring rights to this software to the public domain
|
|
|
|
worldwide. This software is distributed without any warranty.
|
|
|
|
|
|
|
|
You should have received a copy of the CC0 Public Domain Dedication along with
|
|
|
|
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
|
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#ifndef __BLAKE2_IMPL_H__
|
|
|
|
#define __BLAKE2_IMPL_H__
|
|
|
|
|
|
|
|
#include <stddef.h>
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include "config.h"
|
|
|
|
|
|
|
|
#define BLAKE2_IMPL_CAT(x,y) x ## y
|
|
|
|
#define BLAKE2_IMPL_EVAL(x,y) BLAKE2_IMPL_CAT(x,y)
|
|
|
|
#define BLAKE2_IMPL_NAME(fun) BLAKE2_IMPL_EVAL(fun, SUFFIX)
|
|
|
|
|
|
|
|
static inline uint32_t load32( const void *src )
|
|
|
|
{
|
|
|
|
#if defined(NATIVE_LITTLE_ENDIAN) && !defined(HAVE_ALIGNED_ACCESS_REQUIRED)
|
|
|
|
return *( uint32_t * )( src );
|
|
|
|
#else
|
|
|
|
const uint8_t *p = ( uint8_t * )src;
|
|
|
|
uint32_t w = *p++;
|
|
|
|
w |= ( uint32_t )( *p++ ) << 8;
|
|
|
|
w |= ( uint32_t )( *p++ ) << 16;
|
|
|
|
w |= ( uint32_t )( *p++ ) << 24;
|
|
|
|
return w;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline uint64_t load64( const void *src )
|
|
|
|
{
|
|
|
|
#if defined(NATIVE_LITTLE_ENDIAN) && !defined(HAVE_ALIGNED_ACCESS_REQUIRED)
|
|
|
|
return *( uint64_t * )( src );
|
|
|
|
#else
|
|
|
|
const uint8_t *p = ( uint8_t * )src;
|
|
|
|
uint64_t w = *p++;
|
|
|
|
w |= ( uint64_t )( *p++ ) << 8;
|
|
|
|
w |= ( uint64_t )( *p++ ) << 16;
|
|
|
|
w |= ( uint64_t )( *p++ ) << 24;
|
|
|
|
w |= ( uint64_t )( *p++ ) << 32;
|
|
|
|
w |= ( uint64_t )( *p++ ) << 40;
|
|
|
|
w |= ( uint64_t )( *p++ ) << 48;
|
|
|
|
w |= ( uint64_t )( *p++ ) << 56;
|
|
|
|
return w;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void store32( void *dst, uint32_t w )
|
|
|
|
{
|
|
|
|
#if defined(NATIVE_LITTLE_ENDIAN) && !defined(HAVE_ALIGNED_ACCESS_REQUIRED)
|
|
|
|
*( uint32_t * )( dst ) = w;
|
|
|
|
#else
|
|
|
|
uint8_t *p = ( uint8_t * )dst;
|
|
|
|
*p++ = ( uint8_t )w; w >>= 8;
|
|
|
|
*p++ = ( uint8_t )w; w >>= 8;
|
|
|
|
*p++ = ( uint8_t )w; w >>= 8;
|
|
|
|
*p++ = ( uint8_t )w;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void store64( void *dst, uint64_t w )
|
|
|
|
{
|
|
|
|
#if defined(NATIVE_LITTLE_ENDIAN) && !defined(HAVE_ALIGNED_ACCESS_REQUIRED)
|
|
|
|
*( uint64_t * )( dst ) = w;
|
|
|
|
#else
|
|
|
|
uint8_t *p = ( uint8_t * )dst;
|
|
|
|
*p++ = ( uint8_t )w; w >>= 8;
|
|
|
|
*p++ = ( uint8_t )w; w >>= 8;
|
|
|
|
*p++ = ( uint8_t )w; w >>= 8;
|
|
|
|
*p++ = ( uint8_t )w; w >>= 8;
|
|
|
|
*p++ = ( uint8_t )w; w >>= 8;
|
|
|
|
*p++ = ( uint8_t )w; w >>= 8;
|
|
|
|
*p++ = ( uint8_t )w; w >>= 8;
|
|
|
|
*p++ = ( uint8_t )w;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline uint64_t load48( const void *src )
|
|
|
|
{
|
|
|
|
const uint8_t *p = ( const uint8_t * )src;
|
|
|
|
uint64_t w = *p++;
|
|
|
|
w |= ( uint64_t )( *p++ ) << 8;
|
|
|
|
w |= ( uint64_t )( *p++ ) << 16;
|
|
|
|
w |= ( uint64_t )( *p++ ) << 24;
|
|
|
|
w |= ( uint64_t )( *p++ ) << 32;
|
|
|
|
w |= ( uint64_t )( *p++ ) << 40;
|
|
|
|
return w;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void store48( void *dst, uint64_t w )
|
|
|
|
{
|
|
|
|
uint8_t *p = ( uint8_t * )dst;
|
|
|
|
*p++ = ( uint8_t )w; w >>= 8;
|
|
|
|
*p++ = ( uint8_t )w; w >>= 8;
|
|
|
|
*p++ = ( uint8_t )w; w >>= 8;
|
|
|
|
*p++ = ( uint8_t )w; w >>= 8;
|
|
|
|
*p++ = ( uint8_t )w; w >>= 8;
|
|
|
|
*p++ = ( uint8_t )w;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline uint32_t rotl32( const uint32_t w, const unsigned c )
|
|
|
|
{
|
|
|
|
return ( w << c ) | ( w >> ( 32 - c ) );
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline uint64_t rotl64( const uint64_t w, const unsigned c )
|
|
|
|
{
|
|
|
|
return ( w << c ) | ( w >> ( 64 - c ) );
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline uint32_t rotr32( const uint32_t w, const unsigned c )
|
|
|
|
{
|
|
|
|
return ( w >> c ) | ( w << ( 32 - c ) );
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline uint64_t rotr64( const uint64_t w, const unsigned c )
|
|
|
|
{
|
|
|
|
return ( w >> c ) | ( w << ( 64 - c ) );
|
|
|
|
}
|
|
|
|
|
|
|
|
/* prevents compiler optimizing out memset() */
|
|
|
|
static inline void secure_zero_memory(void *v, size_t n)
|
|
|
|
{
|
2018-03-27 14:55:01 +00:00
|
|
|
#if defined(_WIN32) || defined(WIN32)
|
|
|
|
SecureZeroMemory(v, n);
|
|
|
|
#else
|
|
|
|
// prioritize first the general C11 call
|
|
|
|
#if defined(HAVE_MEMSET_S)
|
|
|
|
memset_s(v, n, 0, n);
|
|
|
|
#elif defined(HAVE_EXPLICIT_BZERO)
|
Import Blake2 algorithms (blake2b, blake2s) from libb2
The upstream repository is on github BLAKE2/libb2. Files landed in
sys/contrib/libb2 are the unmodified upstream files, except for one
difference: secure_zero_memory's contents have been replaced with
explicit_bzero() only because the previous implementation broke powerpc
link. Preferential use of explicit_bzero() is in progress upstream, so
it is anticipated we will be able to drop this diff in the future.
sys/crypto/blake2 contains the source files needed to port libb2 to our
build system, a wrapped (limited) variant of the algorithm to match the API
of our auth_transform softcrypto abstraction, incorporation into the Open
Crypto Framework (OCF) cryptosoft(4) driver, as well as an x86 SSE/AVX
accelerated OCF driver, blake2(4).
Optimized variants of blake2 are compiled for a number of x86 machines
(anything from SSE2 to AVX + XOP). On those machines, FPU context will need
to be explicitly saved before using blake2(4)-provided algorithms directly.
Use via cryptodev / OCF saves FPU state automatically, and use via the
auth_transform softcrypto abstraction does not use FPU.
The intent of the OCF driver is mostly to enable testing in userspace via
/dev/crypto. ATF tests are added with published KAT test vectors to
validate correctness.
Reviewed by: jhb, markj
Obtained from: github BLAKE2/libb2
Differential Revision: https://reviews.freebsd.org/D14662
2018-03-21 16:18:14 +00:00
|
|
|
explicit_bzero(v, n);
|
2018-03-27 14:55:01 +00:00
|
|
|
#elif defined(HAVE_EXPLICIT_MEMSET)
|
|
|
|
explicit_memset(v, 0, n);
|
Import Blake2 algorithms (blake2b, blake2s) from libb2
The upstream repository is on github BLAKE2/libb2. Files landed in
sys/contrib/libb2 are the unmodified upstream files, except for one
difference: secure_zero_memory's contents have been replaced with
explicit_bzero() only because the previous implementation broke powerpc
link. Preferential use of explicit_bzero() is in progress upstream, so
it is anticipated we will be able to drop this diff in the future.
sys/crypto/blake2 contains the source files needed to port libb2 to our
build system, a wrapped (limited) variant of the algorithm to match the API
of our auth_transform softcrypto abstraction, incorporation into the Open
Crypto Framework (OCF) cryptosoft(4) driver, as well as an x86 SSE/AVX
accelerated OCF driver, blake2(4).
Optimized variants of blake2 are compiled for a number of x86 machines
(anything from SSE2 to AVX + XOP). On those machines, FPU context will need
to be explicitly saved before using blake2(4)-provided algorithms directly.
Use via cryptodev / OCF saves FPU state automatically, and use via the
auth_transform softcrypto abstraction does not use FPU.
The intent of the OCF driver is mostly to enable testing in userspace via
/dev/crypto. ATF tests are added with published KAT test vectors to
validate correctness.
Reviewed by: jhb, markj
Obtained from: github BLAKE2/libb2
Differential Revision: https://reviews.freebsd.org/D14662
2018-03-21 16:18:14 +00:00
|
|
|
#else
|
2018-03-27 14:55:01 +00:00
|
|
|
memset(v, 0, n);
|
|
|
|
__asm__ __volatile__("" :: "r"(v) : "memory");
|
|
|
|
#endif
|
Import Blake2 algorithms (blake2b, blake2s) from libb2
The upstream repository is on github BLAKE2/libb2. Files landed in
sys/contrib/libb2 are the unmodified upstream files, except for one
difference: secure_zero_memory's contents have been replaced with
explicit_bzero() only because the previous implementation broke powerpc
link. Preferential use of explicit_bzero() is in progress upstream, so
it is anticipated we will be able to drop this diff in the future.
sys/crypto/blake2 contains the source files needed to port libb2 to our
build system, a wrapped (limited) variant of the algorithm to match the API
of our auth_transform softcrypto abstraction, incorporation into the Open
Crypto Framework (OCF) cryptosoft(4) driver, as well as an x86 SSE/AVX
accelerated OCF driver, blake2(4).
Optimized variants of blake2 are compiled for a number of x86 machines
(anything from SSE2 to AVX + XOP). On those machines, FPU context will need
to be explicitly saved before using blake2(4)-provided algorithms directly.
Use via cryptodev / OCF saves FPU state automatically, and use via the
auth_transform softcrypto abstraction does not use FPU.
The intent of the OCF driver is mostly to enable testing in userspace via
/dev/crypto. ATF tests are added with published KAT test vectors to
validate correctness.
Reviewed by: jhb, markj
Obtained from: github BLAKE2/libb2
Differential Revision: https://reviews.freebsd.org/D14662
2018-03-21 16:18:14 +00:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|