Import kernel WireGuard support

Data path largely shared with the OpenBSD implementation by
Matt Dunwoodie <ncon@nconroy.net>

Reviewed by:	grehan@freebsd.org
MFC after:	1 month
Sponsored by:	Rubicon LLC, (Netgate)
Differential Revision:	https://reviews.freebsd.org/D26137
This commit is contained in:
Matt Macy 2020-11-29 19:38:03 +00:00
parent baa2cd58a6
commit 2338da0373
58 changed files with 45704 additions and 1 deletions

View File

@ -35,6 +35,7 @@ SRCS+= ifvxlan.c # VXLAN support
SRCS+= ifgre.c # GRE keys etc
SRCS+= ifgif.c # GIF reversed header workaround
SRCS+= ifipsec.c # IPsec VTI
SRCS+= ifwg.c # Wireguard
SRCS+= sfp.c # SFP/SFP+ information
LIBADD+= ifconfig m util
@ -68,6 +69,7 @@ CFLAGS+= -DINET
CFLAGS+= -DJAIL
LIBADD+= jail
.endif
LIBADD+= nv
MAN= ifconfig.8

618
sbin/ifconfig/ifwg.c Normal file
View File

@ -0,0 +1,618 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 Rubicon Communications, LLC (Netgate)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#ifndef RESCUE
#include <sys/param.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/time.h>
#include <sys/nv.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/if_media.h>
#include <net/route.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <assert.h>
#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <netdb.h>
#include <string.h>
#include <unistd.h>
#include <stdarg.h>
#include <stddef.h> /* NB: for offsetof */
#include <locale.h>
#include <langinfo.h>
#include <resolv.h>
#include "ifconfig.h"
typedef enum {
WGC_GET = 0x5,
WGC_SET = 0x6,
} wg_cmd_t;
static nvlist_t *nvl_params;
static bool do_peer;
static int allowed_ips_count;
static int allowed_ips_max;
struct allowedip {
struct sockaddr_storage a_addr;
struct sockaddr_storage a_mask;
};
struct allowedip *allowed_ips;
#define ALLOWEDIPS_START 16
#define WG_KEY_LEN 32
#define WG_KEY_LEN_BASE64 ((((WG_KEY_LEN) + 2) / 3) * 4 + 1)
#define WG_KEY_LEN_HEX (WG_KEY_LEN * 2 + 1)
#define WG_MAX_STRLEN 64
static bool
key_from_base64(uint8_t key[static WG_KEY_LEN], const char *base64)
{
if (strlen(base64) != WG_KEY_LEN_BASE64 - 1) {
warnx("bad key len - need %d got %zu\n", WG_KEY_LEN_BASE64 - 1, strlen(base64));
return false;
}
if (base64[WG_KEY_LEN_BASE64 - 2] != '=') {
warnx("bad key terminator, expected '=' got '%c'", base64[WG_KEY_LEN_BASE64 - 2]);
return false;
}
return (b64_pton(base64, key, WG_KEY_LEN));
}
static void
parse_endpoint(const char *endpoint_)
{
int err;
char *base, *endpoint, *port, *colon, *tmp;
struct addrinfo hints, *res;
endpoint = base = strdup(endpoint_);
colon = rindex(endpoint, ':');
if (colon == NULL)
errx(1, "bad endpoint format %s - no port delimiter found", endpoint);
*colon = '\0';
port = colon + 1;
/* [::]:<> */
if (endpoint[0] == '[') {
endpoint++;
tmp = index(endpoint, ']');
if (tmp == NULL)
errx(1, "bad endpoint format %s - '[' found with no matching ']'", endpoint);
*tmp = '\0';
}
bzero(&hints, sizeof(hints));
hints.ai_family = AF_UNSPEC;
err = getaddrinfo(endpoint, port, &hints, &res);
if (err)
errx(1, "%s", gai_strerror(err));
nvlist_add_binary(nvl_params, "endpoint", res->ai_addr, res->ai_addrlen);
freeaddrinfo(res);
free(base);
}
static void
in_len2mask(struct in_addr *mask, u_int len)
{
u_int i;
u_char *p;
p = (u_char *)mask;
memset(mask, 0, sizeof(*mask));
for (i = 0; i < len / NBBY; i++)
p[i] = 0xff;
if (len % NBBY)
p[i] = (0xff00 >> (len % NBBY)) & 0xff;
}
static u_int
in_mask2len(struct in_addr *mask)
{
u_int x, y;
u_char *p;
p = (u_char *)mask;
for (x = 0; x < sizeof(*mask); x++) {
if (p[x] != 0xff)
break;
}
y = 0;
if (x < sizeof(*mask)) {
for (y = 0; y < NBBY; y++) {
if ((p[x] & (0x80 >> y)) == 0)
break;
}
}
return x * NBBY + y;
}
static void
in6_prefixlen2mask(struct in6_addr *maskp, int len)
{
static const u_char maskarray[NBBY] = {0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff};
int bytelen, bitlen, i;
/* sanity check */
if (len < 0 || len > 128) {
errx(1, "in6_prefixlen2mask: invalid prefix length(%d)\n",
len);
return;
}
memset(maskp, 0, sizeof(*maskp));
bytelen = len / NBBY;
bitlen = len % NBBY;
for (i = 0; i < bytelen; i++)
maskp->s6_addr[i] = 0xff;
if (bitlen)
maskp->s6_addr[bytelen] = maskarray[bitlen - 1];
}
static int
in6_mask2len(struct in6_addr *mask, u_char *lim0)
{
int x = 0, y;
u_char *lim = lim0, *p;
/* ignore the scope_id part */
if (lim0 == NULL || lim0 - (u_char *)mask > sizeof(*mask))
lim = (u_char *)mask + sizeof(*mask);
for (p = (u_char *)mask; p < lim; x++, p++) {
if (*p != 0xff)
break;
}
y = 0;
if (p < lim) {
for (y = 0; y < NBBY; y++) {
if ((*p & (0x80 >> y)) == 0)
break;
}
}
/*
* when the limit pointer is given, do a stricter check on the
* remaining bits.
*/
if (p < lim) {
if (y != 0 && (*p & (0x00ff >> y)) != 0)
return -1;
for (p = p + 1; p < lim; p++)
if (*p != 0)
return -1;
}
return x * NBBY + y;
}
static bool
parse_ip(struct allowedip *aip, const char *value)
{
struct addrinfo hints, *res;
int err;
bzero(&aip->a_addr, sizeof(aip->a_addr));
bzero(&hints, sizeof(hints));
hints.ai_family = AF_UNSPEC;
hints.ai_flags = AI_NUMERICHOST;
err = getaddrinfo(value, NULL, &hints, &res);
if (err)
errx(1, "%s", gai_strerror(err));
memcpy(&aip->a_addr, res->ai_addr, res->ai_addrlen);
freeaddrinfo(res);
return (true);
}
static void
sa_ntop(const struct sockaddr *sa, char *buf, int *port)
{
const struct sockaddr_in *sin;
const struct sockaddr_in6 *sin6;
int err;
err = getnameinfo(sa, sa->sa_len, buf, INET6_ADDRSTRLEN, NULL,
0, NI_NUMERICHOST);
if (sa->sa_family == AF_INET) {
sin = (const struct sockaddr_in *)sa;
if (port)
*port = sin->sin_port;
} else if (sa->sa_family == AF_INET6) {
sin6 = (const struct sockaddr_in6 *)sa;
if (port)
*port = sin6->sin6_port;
}
if (err)
errx(1, "%s", gai_strerror(err));
}
static void
dump_peer(const nvlist_t *nvl_peer)
{
const void *key;
const struct allowedip *aips;
const struct sockaddr *endpoint;
char outbuf[WG_MAX_STRLEN];
char addr_buf[INET6_ADDRSTRLEN];
size_t size;
int count, port;
printf("[Peer]\n");
if (nvlist_exists_binary(nvl_peer, "public-key")) {
key = nvlist_get_binary(nvl_peer, "public-key", &size);
b64_ntop((const uint8_t *)key, size, outbuf, WG_MAX_STRLEN);
printf("PublicKey = %s\n", outbuf);
}
if (nvlist_exists_binary(nvl_peer, "endpoint")) {
endpoint = nvlist_get_binary(nvl_peer, "endpoint", &size);
sa_ntop(endpoint, addr_buf, &port);
printf("Endpoint = %s:%d\n", addr_buf, ntohs(port));
}
if (!nvlist_exists_binary(nvl_peer, "allowed-ips"))
return;
aips = nvlist_get_binary(nvl_peer, "allowed-ips", &size);
if (size == 0 || size % sizeof(struct allowedip) != 0) {
errx(1, "size %zu not integer multiple of allowedip", size);
}
printf("AllowedIPs = ");
count = size / sizeof(struct allowedip);
for (int i = 0; i < count; i++) {
int mask;
sa_family_t family;
void *bitmask;
struct sockaddr *sa;
sa = __DECONST(void *, &aips[i].a_addr);
bitmask = __DECONST(void *,
((const struct sockaddr *)&aips->a_mask)->sa_data);
family = aips[i].a_addr.ss_family;
getnameinfo(sa, sa->sa_len, addr_buf, INET6_ADDRSTRLEN, NULL,
0, NI_NUMERICHOST);
if (family == AF_INET)
mask = in_mask2len(bitmask);
else if (family == AF_INET6)
mask = in6_mask2len(bitmask, NULL);
else
errx(1, "bad family in peer %d\n", family);
printf("%s/%d", addr_buf, mask);
if (i < count -1)
printf(", ");
}
printf("\n");
}
static int
get_nvl_out_size(int sock, u_long op, size_t *size)
{
struct ifdrv ifd;
int err;
memset(&ifd, 0, sizeof(ifd));
strlcpy(ifd.ifd_name, name, sizeof(ifd.ifd_name));
ifd.ifd_cmd = op;
ifd.ifd_len = 0;
ifd.ifd_data = NULL;
err = ioctl(sock, SIOCGDRVSPEC, &ifd);
if (err)
return (err);
*size = ifd.ifd_len;
return (0);
}
static int
do_cmd(int sock, u_long op, void *arg, size_t argsize, int set)
{
struct ifdrv ifd;
memset(&ifd, 0, sizeof(ifd));
strlcpy(ifd.ifd_name, name, sizeof(ifd.ifd_name));
ifd.ifd_cmd = op;
ifd.ifd_len = argsize;
ifd.ifd_data = arg;
return (ioctl(sock, set ? SIOCSDRVSPEC : SIOCGDRVSPEC, &ifd));
}
static
DECL_CMD_FUNC(peerlist, val, d)
{
size_t size, peercount;
void *packed;
const nvlist_t *nvl, *nvl_peer;
const nvlist_t *const *nvl_peerlist;
if (get_nvl_out_size(s, WGC_GET, &size))
errx(1, "can't get peer list size");
if ((packed = malloc(size)) == NULL)
errx(1, "malloc failed for peer list");
if (do_cmd(s, WGC_GET, packed, size, 0))
errx(1, "failed to obtain peer list");
nvl = nvlist_unpack(packed, size, 0);
if (!nvlist_exists_nvlist_array(nvl, "peer-list"))
return;
nvl_peerlist = nvlist_get_nvlist_array(nvl, "peer-list", &peercount);
for (int i = 0; i < peercount; i++, nvl_peerlist++) {
nvl_peer = *nvl_peerlist;
dump_peer(nvl_peer);
}
}
static void
peerfinish(int s, void *arg)
{
nvlist_t *nvl, **nvl_array;
void *packed;
size_t size;
if ((nvl = nvlist_create(0)) == NULL)
errx(1, "failed to allocate nvlist");
if ((nvl_array = calloc(sizeof(void *), 1)) == NULL)
errx(1, "failed to allocate nvl_array");
if (!nvlist_exists_binary(nvl_params, "public-key"))
errx(1, "must specify a public-key for adding peer");
if (!nvlist_exists_binary(nvl_params, "endpoint"))
errx(1, "must specify an endpoint for adding peer");
if (allowed_ips_count == 0)
errx(1, "must specify at least one range of allowed-ips to add a peer");
nvl_array[0] = nvl_params;
nvlist_add_nvlist_array(nvl, "peer-list", (const nvlist_t * const *)nvl_array, 1);
packed = nvlist_pack(nvl, &size);
if (do_cmd(s, WGC_SET, packed, size, true))
errx(1, "failed to install peer");
}
static
DECL_CMD_FUNC(peerstart, val, d)
{
do_peer = true;
callback_register(peerfinish, NULL);
allowed_ips = malloc(ALLOWEDIPS_START * sizeof(struct allowedip));
allowed_ips_max = ALLOWEDIPS_START;
if (allowed_ips == NULL)
errx(1, "failed to allocate array for allowedips");
}
static
DECL_CMD_FUNC(setwglistenport, val, d)
{
struct addrinfo hints, *res;
const struct sockaddr_in *sin;
const struct sockaddr_in6 *sin6;
u_long ul;
int err;
bzero(&hints, sizeof(hints));
hints.ai_family = AF_UNSPEC;
hints.ai_flags = AI_NUMERICHOST;
err = getaddrinfo(NULL, val, &hints, &res);
if (err)
errx(1, "%s", gai_strerror(err));
if (res->ai_family == AF_INET) {
sin = (struct sockaddr_in *)res->ai_addr;
ul = sin->sin_port;
} else if (res->ai_family == AF_INET6) {
sin6 = (struct sockaddr_in6 *)res->ai_addr;
ul = sin6->sin6_port;
} else {
errx(1, "unknown family");
}
ul = ntohs((u_short)ul);
nvlist_add_number(nvl_params, "listen-port", ul);
}
static
DECL_CMD_FUNC(setwgprivkey, val, d)
{
uint8_t key[WG_KEY_LEN];
if (!key_from_base64(key, val))
errx(1, "invalid key %s", val);
nvlist_add_binary(nvl_params, "private-key", key, WG_KEY_LEN);
}
static
DECL_CMD_FUNC(setwgpubkey, val, d)
{
uint8_t key[WG_KEY_LEN];
if (!do_peer)
errx(1, "setting public key only valid when adding peer");
if (!key_from_base64(key, val))
errx(1, "invalid key %s", val);
nvlist_add_binary(nvl_params, "public-key", key, WG_KEY_LEN);
}
static
DECL_CMD_FUNC(setallowedips, val, d)
{
char *base, *allowedip, *mask;
u_long ul;
char *endp;
struct allowedip *aip;
if (!do_peer)
errx(1, "setting allowed ip only valid when adding peer");
if (allowed_ips_count == allowed_ips_max) {
/* XXX grow array */
}
aip = &allowed_ips[allowed_ips_count];
base = allowedip = strdup(val);
mask = index(allowedip, '/');
if (mask == NULL)
errx(1, "mask separator not found in allowedip %s", val);
*mask = '\0';
mask++;
parse_ip(aip, allowedip);
ul = strtoul(mask, &endp, 0);
if (*endp != '\0')
errx(1, "invalid value for allowedip mask");
bzero(&aip->a_mask, sizeof(aip->a_mask));
if (aip->a_addr.ss_family == AF_INET)
in_len2mask((struct in_addr *)&((struct sockaddr *)&aip->a_mask)->sa_data, ul);
else if (aip->a_addr.ss_family == AF_INET6)
in6_prefixlen2mask((struct in6_addr *)&((struct sockaddr *)&aip->a_mask)->sa_data, ul);
else
errx(1, "invalid address family %d\n", aip->a_addr.ss_family);
allowed_ips_count++;
if (allowed_ips_count > 1)
nvlist_free_binary(nvl_params, "allowed-ips");
nvlist_add_binary(nvl_params, "allowed-ips", allowed_ips,
allowed_ips_count*sizeof(*aip));
dump_peer(nvl_params);
free(base);
}
static
DECL_CMD_FUNC(setendpoint, val, d)
{
if (!do_peer)
errx(1, "setting endpoint only valid when adding peer");
parse_endpoint(val);
}
static void
wireguard_status(int s)
{
size_t size;
void *packed;
nvlist_t *nvl;
char buf[WG_KEY_LEN_BASE64];
const void *key;
uint16_t listen_port;
if (get_nvl_out_size(s, WGC_GET, &size))
return;
if ((packed = malloc(size)) == NULL)
return;
if (do_cmd(s, WGC_GET, packed, size, 0))
return;
nvl = nvlist_unpack(packed, size, 0);
if (nvlist_exists_number(nvl, "listen-port")) {
listen_port = nvlist_get_number(nvl, "listen-port");
printf("\tlisten-port: %d\n", listen_port);
}
if (nvlist_exists_binary(nvl, "private-key")) {
key = nvlist_get_binary(nvl, "private-key", &size);
b64_ntop((const uint8_t *)key, size, buf, WG_MAX_STRLEN);
printf("\tprivate-key: %s\n", buf);
}
if (nvlist_exists_binary(nvl, "public-key")) {
key = nvlist_get_binary(nvl, "public-key", &size);
b64_ntop((const uint8_t *)key, size, buf, WG_MAX_STRLEN);
printf("\tpublic-key: %s\n", buf);
}
}
static struct cmd wireguard_cmds[] = {
DEF_CLONE_CMD_ARG("listen-port", setwglistenport),
DEF_CLONE_CMD_ARG("private-key", setwgprivkey),
DEF_CMD("peer-list", 0, peerlist),
DEF_CMD("peer", 0, peerstart),
DEF_CMD_ARG("public-key", setwgpubkey),
DEF_CMD_ARG("allowed-ips", setallowedips),
DEF_CMD_ARG("endpoint", setendpoint),
};
static struct afswtch af_wireguard = {
.af_name = "af_wireguard",
.af_af = AF_UNSPEC,
.af_other_status = wireguard_status,
};
static void
wg_create(int s, struct ifreq *ifr)
{
struct iovec iov;
void *packed;
size_t size;
setproctitle("ifconfig %s create ...\n", name);
if (!nvlist_exists_number(nvl_params, "listen-port"))
goto legacy;
if (!nvlist_exists_binary(nvl_params, "private-key"))
goto legacy;
packed = nvlist_pack(nvl_params, &size);
if (packed == NULL)
errx(1, "failed to setup create request");
iov.iov_len = size;
iov.iov_base = packed;
ifr->ifr_data = (caddr_t)&iov;
if (ioctl(s, SIOCIFCREATE2, ifr) < 0)
err(1, "SIOCIFCREATE2");
return;
legacy:
ifr->ifr_data == NULL;
if (ioctl(s, SIOCIFCREATE, ifr) < 0)
err(1, "SIOCIFCREATE");
}
static __constructor void
wireguard_ctor(void)
{
int i;
nvl_params = nvlist_create(0);
for (i = 0; i < nitems(wireguard_cmds); i++)
cmd_register(&wireguard_cmds[i]);
af_register(&af_wireguard);
clone_setdefcallback_prefix("wg", wg_create);
}
#endif

View File

@ -0,0 +1,56 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include <sys/types.h>
#ifndef _BLAKE2S_H_
#define _BLAKE2S_H_
enum blake2s_lengths {
BLAKE2S_BLOCK_SIZE = 64,
BLAKE2S_HASH_SIZE = 32,
BLAKE2S_KEY_SIZE = 32
};
struct blake2s_state {
uint32_t h[8];
uint32_t t[2];
uint32_t f[2];
uint8_t buf[BLAKE2S_BLOCK_SIZE];
size_t buflen;
uint8_t last_node;
};
void blake2s_init(struct blake2s_state *state, const size_t outlen);
void blake2s_init_key(struct blake2s_state *state, const size_t outlen,
const void *key, const size_t keylen);
void blake2s_update(struct blake2s_state *state, const uint8_t *in, size_t inlen);
void blake2s_final(struct blake2s_state *state, uint8_t *out, const size_t outlen);
static inline void blake2s(uint8_t *out, const uint8_t *in, const uint8_t *key,
const size_t outlen, const size_t inlen,
const size_t keylen)
{
struct blake2s_state state;
#ifdef __linux___
WARN_ON(IS_ENABLED(DEBUG) && ((!in && inlen > 0) || !out || !outlen ||
outlen > BLAKE2S_HASH_SIZE || keylen > BLAKE2S_KEY_SIZE ||
(!key && keylen)));
#endif
if (keylen)
blake2s_init_key(&state, outlen, key, keylen);
else
blake2s_init(&state, outlen);
blake2s_update(&state, in, inlen);
blake2s_final(&state, out, outlen);
}
void blake2s_hmac(uint8_t *out, const uint8_t *in, const uint8_t *key,
const size_t outlen, const size_t inlen, const size_t keylen);
#endif /* _BLAKE2S_H_ */

View File

@ -0,0 +1,74 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2019-2020 Rubicon Communications, LLC (Netgate)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _CURVE25519_H_
#define _CURVE25519_H_
#include <sys/systm.h>
#define CURVE25519_KEY_SIZE 32
void curve25519_generic(u8 [CURVE25519_KEY_SIZE],
const u8 [CURVE25519_KEY_SIZE],
const u8 [CURVE25519_KEY_SIZE]);
static inline void curve25519_clamp_secret(u8 secret[CURVE25519_KEY_SIZE])
{
secret[0] &= 248;
secret[31] = (secret[31] & 127) | 64;
}
static const u8 null_point[CURVE25519_KEY_SIZE] = { 0 };
static inline int curve25519(u8 mypublic[CURVE25519_KEY_SIZE],
const u8 secret[CURVE25519_KEY_SIZE],
const u8 basepoint[CURVE25519_KEY_SIZE])
{
curve25519_generic(mypublic, secret, basepoint);
return timingsafe_bcmp(mypublic, null_point, CURVE25519_KEY_SIZE);
}
static inline int curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE],
const u8 secret[CURVE25519_KEY_SIZE])
{
static const u8 basepoint[CURVE25519_KEY_SIZE] __aligned(32) = { 9 };
if (timingsafe_bcmp(secret, null_point, CURVE25519_KEY_SIZE) == 0)
return 0;
return curve25519(pub, secret, basepoint);
}
static inline void curve25519_generate_secret(u8 secret[CURVE25519_KEY_SIZE])
{
arc4random_buf(secret, CURVE25519_KEY_SIZE);
curve25519_clamp_secret(secret);
}
#endif /* _CURVE25519_H_ */

View File

@ -0,0 +1,15 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _WG_ZINC_H
#define _WG_ZINC_H
int chacha20_mod_init(void);
int poly1305_mod_init(void);
int chacha20poly1305_mod_init(void);
int blake2s_mod_init(void);
int curve25519_mod_init(void);
#endif

View File

@ -0,0 +1,89 @@
/*
* Copyright (c) 2019 Matt Dunwoodie <ncon@noconroy.net>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* $FreeBSD$
*/
#ifndef __IF_WG_H__
#define __IF_WG_H__
#include <net/if.h>
#include <netinet/in.h>
/*
* This is the public interface to the WireGuard network interface.
*
* It is designed to be used by tools such as ifconfig(8) and wg(4).
*/
#define WG_KEY_SIZE 32
#define WG_DEVICE_HAS_PUBKEY (1 << 0)
#define WG_DEVICE_HAS_PRIVKEY (1 << 1)
#define WG_DEVICE_HAS_MASKED_PRIVKEY (1 << 2)
#define WG_DEVICE_HAS_PORT (1 << 3)
#define WG_DEVICE_HAS_RDOMAIN (1 << 4)
#define WG_DEVICE_REPLACE_PEERS (1 << 5)
#define WG_PEER_HAS_PUBKEY (1 << 0)
#define WG_PEER_HAS_SHAREDKEY (1 << 1)
#define WG_PEER_HAS_MASKED_SHAREDKEY (1 << 2)
#define WG_PEER_HAS_ENDPOINT (1 << 3)
#define WG_PEER_HAS_PERSISTENTKEEPALIVE (1 << 4)
#define WG_PEER_REPLACE_CIDRS (1 << 5)
#define WG_PEER_REMOVE (1 << 6)
#define SIOCSWG _IOWR('i', 200, struct wg_device_io)
#define SIOCGWG _IOWR('i', 201, struct wg_device_io)
#define WG_PEERS_FOREACH(p, d) \
for (p = (d)->d_peers; p < (d)->d_peers + (d)->d_num_peers; p++)
#define WG_CIDRS_FOREACH(c, p) \
for (c = (p)->p_cidrs; c < (p)->p_cidrs + (p)->p_num_cidrs; c++)
struct wg_allowedip {
struct sockaddr_storage a_addr;
struct sockaddr_storage a_mask;
};
enum {
WG_PEER_CTR_TX_BYTES,
WG_PEER_CTR_RX_BYTES,
WG_PEER_CTR_NUM,
};
struct wg_device_io {
char d_name[IFNAMSIZ];
uint8_t d_flags;
in_port_t d_port;
int d_rdomain;
uint8_t d_pubkey[WG_KEY_SIZE];
uint8_t d_privkey[WG_KEY_SIZE];
size_t d_num_peers;
size_t d_num_cidrs;
struct wg_peer_io *d_peers;
};
#ifndef ENOKEY
#define ENOKEY ENOTCAPABLE
#endif
typedef enum {
WGC_GET = 0x5,
WGC_SET = 0x6,
} wg_cmd_t;
#endif /* __IF_WG_H__ */

View File

@ -0,0 +1,322 @@
/*
* Copyright (c) 2019 Matt Dunwoodie <ncon@noconroy.net>
* Copyright (c) 2019-2020 Rubicon Communications, LLC (Netgate)
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* $FreeBSD$
*/
#ifndef _IF_WG_VARS_H_
#define _IF_WG_VARS_H_
#include <sys/types.h>
#include <sys/param.h>
#include <sys/time.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <crypto/siphash/siphash.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_types.h>
#include <net/ethernet.h>
#include <net/pfvar.h>
#include <net/iflib.h>
#include <sys/wg_noise.h>
#include <sys/wg_cookie.h>
/* This is only needed for wg_keypair. */
#include <sys/if_wg_session.h>
#define UNIMPLEMENTED() panic("%s not implemented\n", __func__)
#define WG_KEY_SIZE 32
#define WG_MSG_PADDING_SIZE 16
/* Constant for session */
#define REKEY_TIMEOUT 5
#define REKEY_TIMEOUT_JITTER 500 /* TODO ok? jason */
#define REJECT_AFTER_TIME 180
#define KEEPALIVE_TIMEOUT 10
#define MAX_TIMER_HANDSHAKES (90 / REKEY_TIMEOUT)
#define NEW_HANDSHAKE_TIMEOUT (REKEY_TIMEOUT + KEEPALIVE_TIMEOUT)
#define MAX_QUEUED_INCOMING_HANDSHAKES 4096 /* TODO: replace this with DQL */
#define MAX_QUEUED_PACKETS 1024 /* TODO: replace this with DQL */
#define HASHTABLE_PEER_SIZE (1 << 6) //1 << 11
#define HASHTABLE_INDEX_SIZE (HASHTABLE_PEER_SIZE * 3) //1 << 13
#define PEER_MAGIC1 0xCAFEBABEB00FDADDULL
#define PEER_MAGIC2 0xCAAFD0D0D00DBABEULL
#define PEER_MAGIC3 0xD00DBABEF00DFADEULL
enum message_type {
MESSAGE_INVALID = 0,
MESSAGE_HANDSHAKE_INITIATION = 1,
MESSAGE_HANDSHAKE_RESPONSE = 2,
MESSAGE_HANDSHAKE_COOKIE = 3,
MESSAGE_DATA = 4
};
struct wg_softc;
#if __FreeBSD_version > 1300000
typedef void timeout_t (void *);
#endif
/* Socket */
struct wg_endpoint {
union wg_remote {
struct sockaddr r_sa;
struct sockaddr_in r_sin;
struct sockaddr_in6 r_sin6;
} e_remote;
union wg_source {
struct in_addr l_in;
struct in6_pktinfo l_pktinfo6;
#define l_in6 l_pktinfo6.ipi6_addr
} e_local;
};
struct wg_socket {
struct mtx so_mtx;
in_port_t so_port;
struct socket *so_so4;
struct socket *so_so6;
};
struct wg_queue {
struct mtx q_mtx;
struct mbufq q;
};
struct wg_index {
LIST_ENTRY(wg_index) i_entry;
SLIST_ENTRY(wg_index) i_unused_entry;
uint32_t i_key;
struct noise_remote *i_value;
};
struct wg_timers {
/* t_lock is for blocking wg_timers_event_* when setting t_disabled. */
struct rwlock t_lock;
int t_disabled;
int t_need_another_keepalive;
uint16_t t_persistent_keepalive_interval;
struct callout t_new_handshake;
struct callout t_send_keepalive;
struct callout t_retry_handshake;
struct callout t_zero_key_material;
struct callout t_persistent_keepalive;
struct mtx t_handshake_mtx;
struct timespec t_handshake_last_sent;
struct timespec t_handshake_complete;
volatile int t_handshake_retries;
};
struct wg_peer {
uint64_t p_magic_1;
CK_LIST_ENTRY(wg_peer) p_hash_entry;
CK_LIST_ENTRY(wg_peer) p_entry;
uint64_t p_id;
struct wg_softc *p_sc;
struct noise_remote p_remote;
struct cookie_maker p_cookie;
struct wg_timers p_timers;
struct rwlock p_endpoint_lock;
struct wg_endpoint p_endpoint;
uint64_t p_magic_2;
SLIST_HEAD(,wg_index) p_unused_index;
struct wg_index p_index[3];
struct wg_queue p_encap_queue;
struct wg_queue p_decap_queue;
struct grouptask p_clear_secrets;
struct grouptask p_send_initiation;
struct grouptask p_send_keepalive;
struct grouptask p_send;
struct grouptask p_recv;
counter_u64_t p_tx_bytes;
counter_u64_t p_rx_bytes;
CK_LIST_HEAD(, wg_route) p_routes;
uint64_t p_magic_3;
struct mtx p_lock;
struct epoch_context p_ctx;
};
/* Packet */
void wg_softc_decrypt(struct wg_softc *);
void wg_softc_encrypt(struct wg_softc *);
/* Queue */
void wg_queue_init(struct wg_queue *, const char *);
void wg_queue_deinit(struct wg_queue *);
/* Counter */
/* Timers */
/* Route */
enum route_direction {
IN,
OUT,
};
struct wg_route_table {
size_t t_count;
struct radix_node_head *t_ip;
struct radix_node_head *t_ip6;
};
struct wg_peer;
struct wg_route {
struct radix_node r_nodes[2];
struct wg_allowedip r_cidr;
CK_LIST_ENTRY(wg_route) r_entry;
struct wg_peer *r_peer;
};
int wg_route_add(struct wg_route_table *, struct wg_peer *,
const struct wg_allowedip *);
int wg_route_delete(struct wg_route_table *, struct wg_peer *);
/* Noise */
/*
* Peer
*
*
*
*/
struct wg_softc;
struct wg_hashtable {
struct mtx h_mtx;
SIPHASH_KEY h_secret;
CK_LIST_HEAD(, wg_peer) h_peers_list;
CK_LIST_HEAD(, wg_peer) *h_peers;
u_long h_peers_mask;
size_t h_num_peers;
LIST_HEAD(, noise_keypair) *h_keys;
u_long h_keys_mask;
size_t h_num_keys;
};
/* Softc */
struct wg_softc {
if_softc_ctx_t shared;
if_ctx_t wg_ctx;
struct ifnet *sc_ifp;
uint16_t sc_incoming_port;
uint32_t sc_user_cookie;
struct wg_socket sc_socket;
struct wg_hashtable sc_hashtable;
struct wg_route_table sc_routes;
struct mbufq sc_handshake_queue;
struct grouptask sc_handshake;
struct noise_local sc_local;
struct cookie_checker sc_cookie;
struct buf_ring *sc_encap_ring;
struct buf_ring *sc_decap_ring;
struct grouptask *sc_encrypt;
struct grouptask *sc_decrypt;
struct rwlock sc_index_lock;
LIST_HEAD(,wg_index) *sc_index;
u_long sc_index_mask;
struct mtx sc_mtx;
};
struct wg_tag {
struct m_tag wt_tag;
struct wg_endpoint t_endpoint;
struct wg_peer *t_peer;
struct mbuf *t_mbuf;
sa_family_t t_family;
int t_done;
int t_mtu;
};
int wg_route_add(struct wg_route_table *tbl, struct wg_peer *peer,
const struct wg_allowedip *cidr_);
struct wg_peer *wg_route_lookup(struct wg_route_table *, struct mbuf *,
enum route_direction);
void wg_peer_remove_all(struct wg_softc *);
struct wg_peer *wg_peer_alloc(struct wg_softc *);
void wg_peer_destroy(struct wg_peer *);
void wg_hashtable_init(struct wg_hashtable *);
void wg_hashtable_destroy(struct wg_hashtable *);
void wg_hashtable_peer_insert(struct wg_hashtable *, struct wg_peer *);
struct wg_peer *wg_peer_lookup(struct wg_softc *,
const uint8_t [WG_KEY_SIZE]);
void wg_hashtable_peer_remove(struct wg_hashtable *, struct wg_peer *);
int wg_queue_out(struct wg_peer *peer, struct mbuf *m);
int wg_route_init(struct wg_route_table *);
void wg_route_destroy(struct wg_route_table *);
int wg_socket_init(struct wg_softc *sc);
void wg_socket_reinit(struct wg_softc *, struct socket *so4,
struct socket *so6);
int wg_socket_close(struct wg_socket *so);
void wg_softc_handshake_receive(struct wg_softc *sc);
int wg_timers_get_persistent_keepalive(struct wg_timers *, uint16_t *);
void wg_timers_set_persistent_keepalive(struct wg_timers *t, uint16_t);
void wg_timers_get_last_handshake(struct wg_timers *, struct timespec *);
struct noise_remote *wg_remote_get(struct wg_softc *, uint8_t [NOISE_KEY_SIZE]);
uint32_t wg_index_set(struct wg_softc *, struct noise_remote *);
struct noise_remote *wg_index_get(struct wg_softc *, uint32_t);
void wg_index_drop(struct wg_softc *, uint32_t);
void wg_encrypt_dispatch(struct wg_softc *);
void wg_decrypt_dispatch(struct wg_softc *);
struct wg_tag *wg_tag_get(struct mbuf *m);
#endif /* _IF_WG_VARS_H_ */

View File

@ -0,0 +1,74 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2019-2020 Rubicon Communications, LLC (Netgate)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SIMD_X86_64_H_
#define _SIMD_X86_64_H_
#include <x86/x86_var.h>
#include <x86/specialreg.h>
static inline uint64_t
xgetbv(uint32_t index)
{
uint32_t eax, edx;
/* xgetbv - instruction byte code */
__asm__ __volatile__(".byte 0x0f; .byte 0x01; .byte 0xd0"
: "=a" (eax), "=d" (edx)
: "c" (index));
return ((((uint64_t)edx)<<32) | (uint64_t)eax);
}
/*
* Detect register set support
*/
static inline boolean_t
__simd_state_enabled(const uint64_t state)
{
boolean_t has_osxsave;
uint64_t xcr0;
has_osxsave = !!(cpu_feature2 & CPUID2_OSXSAVE);
if (!has_osxsave)
return (0);
xcr0 = xgetbv(0);
return ((xcr0 & state) == state);
}
#define _XSTATE_SSE_AVX (0x2 | 0x4)
#define _XSTATE_AVX512 (0xE0 | _XSTATE_SSE_AVX)
#define __ymm_enabled() __simd_state_enabled(_XSTATE_SSE_AVX)
#define __zmm_enabled() __simd_state_enabled(_XSTATE_AVX512)
#endif

View File

@ -0,0 +1,342 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2019-2020 Rubicon Communications, LLC (Netgate)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef SYS_SUPPORT_H_
#define SYS_SUPPORT_H_
#ifdef __LOCORE
#include <machine/asm.h>
#define SYM_FUNC_START ENTRY
#define SYM_FUNC_END END
#else
#include <sys/types.h>
#include <sys/limits.h>
#include <sys/endian.h>
#include <sys/libkern.h>
#include <sys/malloc.h>
#include <sys/proc.h>
#include <sys/lock.h>
#include <vm/uma.h>
#if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
#include <machine/fpu.h>
#endif
#include <crypto/siphash/siphash.h>
#define COMPAT_ZINC_IS_A_MODULE
MALLOC_DECLARE(M_WG);
#define BUILD_BUG_ON(x) CTASSERT(!(x))
#define BIT(nr) (1UL << (nr))
#define BIT_ULL(nr) (1ULL << (nr))
#ifdef __LP64__
#define BITS_PER_LONG 64
#else
#define BITS_PER_LONG 32
#endif
#define rw_enter_write rw_wlock
#define rw_exit_write rw_wunlock
#define rw_enter_read rw_rlock
#define rw_exit_read rw_runlock
#define rw_exit rw_unlock
#define ASSERT(x) MPASS(x)
#define ___PASTE(a,b) a##b
#define __PASTE(a,b) ___PASTE(a,b)
#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
#define typeof(x) __typeof__(x)
#define min_t(t, a, b) ({ t __a = (a); t __b = (b); __a > __b ? __b : __a; })
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint32_t __le32;
typedef uint64_t u64;
typedef uint64_t __le64;
#define __must_check __attribute__((__warn_unused_result__))
#define asmlinkage
#define __ro_after_init __read_mostly
#define get_unaligned_le32(x) le32dec(x)
#define get_unaligned_le64(x) le64dec(x)
#define cpu_to_le64(x) htole64(x)
#define cpu_to_le32(x) htole32(x)
#define letoh64(x) le64toh(x)
#define need_resched() \
((curthread->td_flags & (TDF_NEEDRESCHED|TDF_ASTPENDING)) || \
curthread->td_owepreempt)
#define CONTAINER_OF(a, b, c) __containerof((a), b, c)
typedef struct {
uint64_t k0;
uint64_t k1;
} SIPHASH_KEY;
static inline uint64_t
siphash24(const SIPHASH_KEY *key, const void *src, size_t len)
{
SIPHASH_CTX ctx;
return (SipHashX(&ctx, 2, 4, (const uint8_t *)key, src, len));
}
static inline void
put_unaligned_le32(u32 val, void *p)
{
*((__le32 *)p) = cpu_to_le32(val);
}
#define rol32(i32, n) ((i32) << (n) | (i32) >> (32 - (n)))
#define memzero_explicit(p, s) explicit_bzero(p, s)
#define EXPORT_SYMBOL(x)
#define U32_MAX ((u32)~0U)
#if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
#define kfpu_begin(ctx) { \
if (ctx->sc_fpu_ctx == NULL) { \
ctx->sc_fpu_ctx = fpu_kern_alloc_ctx(0); \
} \
critical_enter(); \
fpu_kern_enter(curthread, ctx->sc_fpu_ctx, FPU_KERN_NORMAL); \
}
#define kfpu_end(ctx) { \
MPASS(ctx->sc_fpu_ctx != NULL); \
fpu_kern_leave(curthread, ctx->sc_fpu_ctx); \
critical_exit(); \
}
#else
#define kfpu_begin(ctx)
#define kfpu_end(ctx)
#define fpu_kern_free_ctx(p)
#endif
typedef enum {
HAVE_NO_SIMD = 1 << 0,
HAVE_FULL_SIMD = 1 << 1,
HAVE_SIMD_IN_USE = 1 << 31
} simd_context_state_t;
typedef struct {
simd_context_state_t sc_state;
struct fpu_kern_ctx *sc_fpu_ctx;
} simd_context_t;
#define DONT_USE_SIMD NULL
static __must_check inline bool
may_use_simd(void)
{
#if defined(__amd64__)
return true;
#else
return false;
#endif
}
static inline void
simd_get(simd_context_t *ctx)
{
ctx->sc_state = may_use_simd() ? HAVE_FULL_SIMD : HAVE_NO_SIMD;
}
static inline void
simd_put(simd_context_t *ctx)
{
#if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
if (is_fpu_kern_thread(0))
return;
#endif
if (ctx->sc_state & HAVE_SIMD_IN_USE)
kfpu_end(ctx);
ctx->sc_state = HAVE_NO_SIMD;
}
static __must_check inline bool
simd_use(simd_context_t *ctx)
{
#if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
if (is_fpu_kern_thread(0))
return true;
#else
return false;
#endif
if (ctx == NULL)
return false;
if (!(ctx->sc_state & HAVE_FULL_SIMD))
return false;
if (ctx->sc_state & HAVE_SIMD_IN_USE)
return true;
kfpu_begin(ctx);
ctx->sc_state |= HAVE_SIMD_IN_USE;
return true;
}
static inline bool
simd_relax(simd_context_t *ctx)
{
if ((ctx->sc_state & HAVE_SIMD_IN_USE) && need_resched()) {
simd_put(ctx);
simd_get(ctx);
return simd_use(ctx);
}
return false;
}
#define unlikely(x) __predict_false(x)
#define likely(x) __predict_true(x)
/* Generic path for arbitrary size */
static inline unsigned long
__crypto_memneq_generic(const void *a, const void *b, size_t size)
{
unsigned long neq = 0;
while (size >= sizeof(unsigned long)) {
neq |= *(const unsigned long *)a ^ *(const unsigned long *)b;
__compiler_membar();
a = ((const char *)a + sizeof(unsigned long));
b = ((const char *)b + sizeof(unsigned long));
size -= sizeof(unsigned long);
}
while (size > 0) {
neq |= *(const unsigned char *)a ^ *(const unsigned char *)b;
__compiler_membar();
a = (const char *)a + 1;
b = (const char *)b + 1;
size -= 1;
}
return neq;
}
#define crypto_memneq(a, b, c) __crypto_memneq_generic((a), (b), (c))
static inline void
__cpu_to_le32s(uint32_t *buf)
{
*buf = htole32(*buf);
}
static inline void cpu_to_le32_array(u32 *buf, unsigned int words)
{
while (words--) {
__cpu_to_le32s(buf);
buf++;
}
}
#define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int len);
static inline void crypto_xor_cpy(u8 *dst, const u8 *src1, const u8 *src2,
unsigned int size)
{
if (CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS &&
__builtin_constant_p(size) &&
(size % sizeof(unsigned long)) == 0) {
unsigned long *d = (unsigned long *)dst;
const unsigned long *s1 = (const unsigned long *)src1;
const unsigned long *s2 = (const unsigned long *)src2;
while (size > 0) {
*d++ = *s1++ ^ *s2++;
size -= sizeof(unsigned long);
}
} else {
__crypto_xor(dst, src1, src2, size);
}
}
#include <sys/kernel.h>
#define module_init(fn) \
static void \
wrap_ ## fn(void *dummy __unused) \
{ \
fn(); \
} \
SYSINIT(zfs_ ## fn, SI_SUB_LAST, SI_ORDER_FIRST, wrap_ ## fn, NULL)
#define module_exit(fn) \
static void \
wrap_ ## fn(void *dummy __unused) \
{ \
fn(); \
} \
SYSUNINIT(zfs_ ## fn, SI_SUB_LAST, SI_ORDER_FIRST, wrap_ ## fn, NULL)
#define module_param(a, b, c)
#define MODULE_LICENSE(x)
#define MODULE_DESCRIPTION(x)
#define MODULE_AUTHOR(x)
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#define __initconst
#define __initdata
#define __init
#define __exit
#define BUG() panic("%s:%d bug hit!\n", __FILE__, __LINE__)
#define WARN_ON(cond) ({ \
bool __ret = (cond); \
if (__ret) { \
printf("WARNING %s failed at %s:%d\n", \
__stringify(cond), __FILE__, __LINE__); \
} \
unlikely(__ret); \
})
#define pr_err printf
#define pr_info printf
#define IS_ENABLED(x) 0
#define ___stringify(...) #__VA_ARGS__
#define __stringify(...) ___stringify(__VA_ARGS__)
#define kmalloc(size, flag) malloc((size), M_WG, M_WAITOK)
#define kfree(p) free(p, M_WG)
#define vzalloc(size) malloc((size), M_WG, M_WAITOK|M_ZERO)
#define vfree(p) free(p, M_WG)
#endif
#endif

View File

@ -0,0 +1,174 @@
/*
* Copyright (C) 2015-2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
* Copyright (C) 2019-2020 Matt Dunwoodie <ncon@noconroy.net>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* ======== wg_cookie.h ========
*
* This file provides a thread safe interface to the WireGuard cookie
* mechanism. It is split into three parts:
*
* * cookie_maker
* Used to create MACs for messages.
* * cookie_checker
* Used to validate MACs for messages.
* * cookie_macs
* The MACs that authenticate the message.
*
* The MACs provide two properties:
* * mac1 - That the remote end knows a value.
* * mac2 - That the remote end has a specific IP address.
*
* void cookie_maker_init(cookie_maker, ipl, input)
* - Initialise cookie_maker, should only be called once and before use.
* input is the shared value used for mac1.
*
* int cookie_checker_init(cookie_checker, ipl)
* - Initialise cookie_checker, should only be called once and before use. It
* will return ENOBUFS if it cannot allocate required memory.
*
* void cookie_checker_update(cookie_checker, input)
* - Set the input value to check mac1 against.
*
* void cookie_checker_deinit(cookie_checker)
* - Destroy all values associated with cookie_checker. cookie_checker must
* not be used after calling this function.
*
* void cookie_checker_create_payload(cookie_checker, cookie_macs, nonce,
* payload, sockaddr)
* - Create a specific payload derived from the sockaddr. The payload is an
* encrypted shared secret, that the cookie_maker will decrypt and used to
* key the mac2 value.
*
* int cookie_maker_consume_payload(cookie_maker, nonce, payload)
* - Have cookie_maker consume the payload.
*
* void cookie_maker_mac(cookie_maker, cookie_macs, message, len)
* - Create cookie_macs for the message of length len. It will always compute
* mac1, however will only compute mac2 if we have recently received a
* payload to key it with.
*
* int cookie_checker_validate_macs(cookie_checker, cookie_macs, message, len,
* busy, sockaddr)
* - Use cookie_checker to validate the cookie_macs of message with length
* len. If busy, then ratelimiting will be applied to the sockaddr.
*
* ==========================
* $FreeBSD$
*/
#ifndef __COOKIE_H__
#define __COOKIE_H__
#include <sys/types.h>
#include <sys/time.h>
#include <sys/rwlock.h>
#include <sys/queue.h>
#include <sys/support.h>
#include <netinet/in.h>
#include <crypto/blake2s.h>
#define COOKIE_MAC_SIZE 16
#define COOKIE_KEY_SIZE 32
#define COOKIE_XNONCE_SIZE 24
#define COOKIE_COOKIE_SIZE 16
#define COOKIE_SECRET_SIZE 32
#define COOKIE_INPUT_SIZE 32
#define COOKIE_ENCRYPTED_SIZE (COOKIE_COOKIE_SIZE + COOKIE_MAC_SIZE)
#define COOKIE_MAC1_KEY_LABEL "mac1----"
#define COOKIE_COOKIE_KEY_LABEL "cookie--"
#define COOKIE_SECRET_MAX_AGE 120
#define COOKIE_SECRET_LATENCY 5
/* Constants for initiation rate limiting */
#define RATELIMIT_SIZE (1 << 10)
#define RATELIMIT_SIZE_MAX (RATELIMIT_SIZE * 8)
#define NSEC_PER_SEC 1000000000LL
#define INITIATIONS_PER_SECOND 50
#define INITIATIONS_BURSTABLE 10
#define INITIATION_COST (NSEC_PER_SEC / INITIATIONS_PER_SECOND)
#define TOKEN_MAX (INITIATION_COST * INITIATIONS_BURSTABLE)
#define ELEMENT_TIMEOUT 1
#define IPV4_MASK_SIZE 4 /* Use all 4 bytes of IPv4 address */
#define IPV6_MASK_SIZE 8 /* Use top 8 bytes (/64) of IPv6 address */
struct cookie_macs {
uint8_t mac1[COOKIE_MAC_SIZE];
uint8_t mac2[COOKIE_MAC_SIZE];
} __packed;
struct ratelimit_entry {
LIST_ENTRY(ratelimit_entry) r_entry;
sa_family_t r_af;
union {
struct in_addr r_in;
struct in6_addr r_in6;
};
struct timespec r_last_time; /* nanouptime */
uint64_t r_tokens;
};
struct ratelimit {
SIPHASH_KEY rl_secret;
uma_zone_t rl_zone;
struct rwlock rl_lock;
LIST_HEAD(, ratelimit_entry) *rl_table;
u_long rl_table_mask;
size_t rl_table_num;
struct timespec rl_last_gc; /* nanouptime */
};
struct cookie_maker {
uint8_t cp_mac1_key[COOKIE_KEY_SIZE];
uint8_t cp_cookie_key[COOKIE_KEY_SIZE];
struct rwlock cp_lock;
uint8_t cp_cookie[COOKIE_COOKIE_SIZE];
struct timespec cp_birthdate; /* nanouptime */
int cp_mac1_valid;
uint8_t cp_mac1_last[COOKIE_MAC_SIZE];
};
struct cookie_checker {
struct ratelimit cc_ratelimit;
struct rwlock cc_key_lock;
uint8_t cc_mac1_key[COOKIE_KEY_SIZE];
uint8_t cc_cookie_key[COOKIE_KEY_SIZE];
struct rwlock cc_secret_lock;
struct timespec cc_secret_birthdate; /* nanouptime */
uint8_t cc_secret[COOKIE_SECRET_SIZE];
};
void cookie_maker_init(struct cookie_maker *, const uint8_t[COOKIE_INPUT_SIZE]);
int cookie_checker_init(struct cookie_checker *, uma_zone_t);
void cookie_checker_update(struct cookie_checker *,
uint8_t[COOKIE_INPUT_SIZE]);
void cookie_checker_deinit(struct cookie_checker *);
void cookie_checker_create_payload(struct cookie_checker *,
struct cookie_macs *cm, uint8_t[COOKIE_XNONCE_SIZE],
uint8_t [COOKIE_ENCRYPTED_SIZE], struct sockaddr *);
int cookie_maker_consume_payload(struct cookie_maker *,
uint8_t[COOKIE_XNONCE_SIZE], uint8_t[COOKIE_ENCRYPTED_SIZE]);
void cookie_maker_mac(struct cookie_maker *, struct cookie_macs *,
void *, size_t);
int cookie_checker_validate_macs(struct cookie_checker *,
struct cookie_macs *, void *, size_t, int, struct sockaddr *);
#endif /* __COOKIE_H__ */

View File

@ -0,0 +1,123 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2019-2020 Rubicon Communications, LLC (Netgate)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef MODULE_H_
#define MODULE_H_
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <net/if.h>
#include <net/if_var.h>
#include <sys/support.h>
#include <sys/types.h>
#include <sys/epoch.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <crypto/curve25519.h>
#include <zinc/chacha20poly1305.h>
#include <crypto/blake2s.h>
MALLOC_DECLARE(M_WG);
enum noise_lengths {
NOISE_PUBLIC_KEY_LEN = CURVE25519_KEY_SIZE,
NOISE_SYMMETRIC_KEY_LEN = CHACHA20POLY1305_KEY_SIZE,
NOISE_TIMESTAMP_LEN = sizeof(uint64_t) + sizeof(uint32_t),
NOISE_AUTHTAG_LEN = CHACHA20POLY1305_AUTHTAG_SIZE,
NOISE_HASH_LEN = BLAKE2S_HASH_SIZE
};
#define noise_encrypted_len(plain_len) ((plain_len) + NOISE_AUTHTAG_LEN)
enum cookie_values {
COOKIE_SECRET_MAX_AGE = 2 * 60,
COOKIE_SECRET_LATENCY = 5,
COOKIE_NONCE_LEN = XCHACHA20POLY1305_NONCE_SIZE,
COOKIE_LEN = 16
};
enum limits {
REKEY_TIMEOUT = 5,
INITIATIONS_PER_SECOND = 50,
MAX_PEERS_PER_DEVICE = 1U << 20,
KEEPALIVE_TIMEOUT = 10,
MAX_TIMER_HANDSHAKES = 90 / REKEY_TIMEOUT,
MAX_QUEUED_INCOMING_HANDSHAKES = 4096, /* TODO: replace this with DQL */
MAX_STAGED_PACKETS = 128,
MAX_QUEUED_PACKETS = 1024 /* TODO: replace this with DQL */
};
#define zfree(addr, type) \
do { \
explicit_bzero(addr, sizeof(*addr)); \
free(addr, type); \
} while (0)
struct crypt_queue {
union {
struct {
int last_cpu;
};
};
};
#define __ATOMIC_LOAD_SIZE \
({ \
switch (size) { \
case 1: *(uint8_t *)res = *(volatile uint8_t *)p; break; \
case 2: *(uint16_t *)res = *(volatile uint16_t *)p; break; \
case 4: *(uint32_t *)res = *(volatile uint32_t *)p; break; \
case 8: *(uint64_t *)res = *(volatile uint64_t *)p; break; \
} \
})
static inline void
__atomic_load_acq_size(volatile void *p, void *res, int size)
{
__ATOMIC_LOAD_SIZE;
}
#define atomic_load_acq(x) \
({ \
union { __typeof(x) __val; char __c[1]; } __u; \
__atomic_load_acq_size(&(x), __u.__c, sizeof(x)); \
__u.__val; \
})
int wg_ctx_init(void);
void wg_ctx_uninit(void);
#endif

View File

@ -0,0 +1,286 @@
/*
* Copyright (C) 2015-2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
* Copyright (C) 2019-2020 Matt Dunwoodie <ncon@noconroy.net>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* ======== wg_noise.h ========
*
* This file provides a thread safe interface to the Noise protocol as used in
* WireGuard. The three user facing components are:
*
* * noise_local
* Stores the local state for a noise peer.
* * noise_remote
* Stores the remote state for a noise peer.
* * noise_upcall
* Stores callback routines for index and peers
*
* Additionally a noise_counter, which is invsible to the user is used to track
* message nonces, to prevent message replay.
*
* This module uses Curve25519 for asymmetric crypto, and ChaCha20Poly1305 for
* symmetric crypto. The handshake uses ephemeral keys, which provide perfect
* forward secrecy. Keys are NOISE_KEY_SIZE (32) bytes long and can be
* generated with a CSRNG. While this module will clamp the key to form a valid
* Curve25519 key, it is recommended that keys are stored in Curve25519 form to
* preserve interoperability with other systems. Additionally, there is an
* optional PresharedKey of length NOISE_PSK_SIZE (also 32 bytes), which when
* used, will provide protection against known quantum attacks. Without it,
* Curve25519 is broken by Shor's algorithm.
*
* -------- noise_local --------
*
* void noise_local_init(noise_local *, noise_upcall *)
* - Initialise noise_local, should only be called once and before use.
*
* int noise_local_set_private(noise_local *, uint8_t *private)
* - Set the local private key. This will also calculate the corresponding
* public key.
*
* int noise_local_keys(noise_local *, uint8_t *public, uint8_t *private)
* - Get the local keys. It will ensure that a key has been set and if
* not, will return ENXIO.
*
* -------- noise_remote --------
*
* void noise_remote_init(noise_remote *, uint8_t *public)
* - Initialise noise_local, should only be called once and before use. Key
* must be provided and it cannot be changed once set.
*
* void noise_remote_set_psk(noise_remote *, uint8_t *psk)
* - Set the shared key. To remove the shared key, set a key of all 0x00.
*
* void noise_remote_keys(noise_remote *, uint8_t *public, uint8_t *psk)
* - Get the remote keys.
*
* -------- noise_upcall --------
*
* The noise_upcall struct is used to lookup incoming public keys, as well as
* allocate and deallocate index for a remote. The allocation and deallocation
* are serialised per noise_remote and guaranteed to only have 3 allocated
* indexes at once.
*
* u_arg - passed to callback functions as void *
* u_get_remote - lookup noise_remote based on public key.
* u_set_index - allocate index for noise_remote. any further packets that
* arrive with this index should be passed to noise_* functions
* with the corresponding noise_remote.
* u_drop_index - dealloate index passed to callback.
*
* -------- crypto --------
*
* The following functions are used for the crypto side of things:
*
* int noise_create_initiation(noise_remote *, noise_initiation *)
* int noise_consume_initiation(noise_local *, noise_remote **, noise_initiation *)
* int noise_create_response(noise_remote *, noise_response *)
* int noise_consume_response(noise_remote *, noise_response *)
*
* int noise_remote_promote(noise_remote *)
* void noise_remote_clear(noise_remote *)
* void noise_remote_expire_current(noise_remote *)
* int noise_remote_encrypt(noise_remote *, noise_data *, size_t)
* int noise_remote_decrypt(noise_remote *, noise_data *, size_t)
*
* $FreeBSD$
*/
#ifndef __NOISE_H__
#define __NOISE_H__
#include <sys/types.h>
#include <sys/time.h>
#include <sys/rwlock.h>
#include <sys/support.h>
#include <crypto/blake2s.h>
#include <zinc/chacha20poly1305.h>
#include <crypto/curve25519.h>
#define NOISE_KEY_SIZE CURVE25519_KEY_SIZE
#define NOISE_PSK_SIZE 32
#define NOISE_MAC_SIZE CHACHA20POLY1305_AUTHTAG_SIZE
#define NOISE_HASH_SIZE BLAKE2S_HASH_SIZE
#define NOISE_SYMMETRIC_SIZE CHACHA20POLY1305_KEY_SIZE
#define NOISE_TIMESTAMP_SIZE 12
/* Protocol string constants */
#define NOISE_HANDSHAKE_NAME "Noise_IKpsk2_25519_ChaChaPoly_BLAKE2s"
#define NOISE_IDENTIFIER_NAME "WireGuard v1 zx2c4 Jason@zx2c4.com"
/* Constants for the counter */
#define COUNTER_TYPE size_t
#define COUNTER_BITS_TOTAL 512
#define COUNTER_TYPE_BITS (sizeof(COUNTER_TYPE) * 8)
#define COUNTER_TYPE_NUM (COUNTER_BITS_TOTAL / COUNTER_TYPE_BITS)
#define COUNTER_WINDOW_SIZE (COUNTER_BITS_TOTAL - COUNTER_TYPE_BITS)
/* Constants for the keypair */
#define REKEY_AFTER_MESSAGES (1ull << 60)
#define REJECT_AFTER_MESSAGES (UINT64_MAX - COUNTER_WINDOW_SIZE - 1)
#define REKEY_AFTER_TIME 120
#define REKEY_AFTER_TIME_RECV 165
#define REJECT_AFTER_TIME 180
#define REJECT_INTERVAL (1000000000 / 50) /* fifty times per sec */
/* 24 = floor(log2(REJECT_INTERVAL)) */
#define REJECT_INTERVAL_MASK (~((1ull<<24)-1))
enum noise_state_hs {
HS_ZEROED = 0,
CREATED_INITIATION,
CONSUMED_INITIATION,
CREATED_RESPONSE,
CONSUMED_RESPONSE,
};
struct noise_handshake {
enum noise_state_hs hs_state;
uint32_t hs_local_index;
uint32_t hs_remote_index;
uint8_t hs_e[NOISE_KEY_SIZE];
uint8_t hs_hash[NOISE_HASH_SIZE];
uint8_t hs_ck[NOISE_HASH_SIZE];
};
struct noise_counter {
struct rwlock c_lock;
uint64_t c_send;
uint64_t c_recv;
COUNTER_TYPE c_backtrack[COUNTER_TYPE_NUM];
};
enum noise_state_kp {
KP_ZEROED = 0,
INITIATOR,
RESPONDER,
};
struct noise_keypair {
SLIST_ENTRY(noise_keypair) kp_entry;
int kp_valid;
int kp_is_initiator;
uint32_t kp_local_index;
uint32_t kp_remote_index;
uint8_t kp_send[NOISE_SYMMETRIC_SIZE];
uint8_t kp_recv[NOISE_SYMMETRIC_SIZE];
struct timespec kp_birthdate; /* nanouptime */
struct noise_counter kp_ctr;
};
struct noise_remote {
uint8_t r_public[NOISE_KEY_SIZE];
struct noise_local *r_local;
uint8_t r_ss[NOISE_KEY_SIZE];
struct rwlock r_handshake_lock;
struct noise_handshake r_handshake;
uint8_t r_psk[NOISE_PSK_SIZE];
uint8_t r_timestamp[NOISE_TIMESTAMP_SIZE];
struct timespec r_last_init; /* nanouptime */
struct rwlock r_keypair_lock;
SLIST_HEAD(,noise_keypair) r_unused_keypairs;
struct noise_keypair *r_next, *r_current, *r_previous;
struct noise_keypair r_keypair[3]; /* 3: next, current, previous. */
};
struct noise_local {
struct rwlock l_identity_lock;
int l_has_identity;
uint8_t l_public[NOISE_KEY_SIZE];
uint8_t l_private[NOISE_KEY_SIZE];
struct noise_upcall {
void *u_arg;
struct noise_remote *
(*u_remote_get)(void *, uint8_t[NOISE_KEY_SIZE]);
uint32_t
(*u_index_set)(void *, struct noise_remote *);
void (*u_index_drop)(void *, uint32_t);
} l_upcall;
};
struct noise_initiation {
uint32_t s_idx;
uint8_t ue[NOISE_KEY_SIZE];
uint8_t es[NOISE_KEY_SIZE + NOISE_MAC_SIZE];
uint8_t ets[NOISE_TIMESTAMP_SIZE + NOISE_MAC_SIZE];
} __packed;
struct noise_response {
uint32_t s_idx;
uint32_t r_idx;
uint8_t ue[NOISE_KEY_SIZE];
uint8_t en[0 + NOISE_MAC_SIZE];
} __packed;
struct noise_data {
uint32_t r_idx;
uint64_t nonce;
uint8_t buf[];
} __packed;
/* Set/Get noise parameters */
void noise_local_init(struct noise_local *, struct noise_upcall *);
void noise_local_lock_identity(struct noise_local *);
void noise_local_unlock_identity(struct noise_local *);
int noise_local_set_private(struct noise_local *, uint8_t[NOISE_KEY_SIZE]);
int noise_local_keys(struct noise_local *, uint8_t[NOISE_KEY_SIZE],
uint8_t[NOISE_KEY_SIZE]);
void noise_remote_init(struct noise_remote *, const uint8_t[NOISE_KEY_SIZE],
struct noise_local *);
int noise_remote_set_psk(struct noise_remote *, const uint8_t[NOISE_PSK_SIZE]);
int noise_remote_keys(struct noise_remote *, uint8_t[NOISE_KEY_SIZE],
uint8_t[NOISE_PSK_SIZE]);
/* Should be called anytime noise_local_set_private is called */
void noise_remote_precompute(struct noise_remote *);
/* Cryptographic functions */
int noise_create_initiation(
struct noise_remote *,
struct noise_initiation *);
int noise_consume_initiation(
struct noise_local *,
struct noise_remote **,
struct noise_initiation *);
int noise_create_response(
struct noise_remote *,
struct noise_response *);
int noise_consume_response(
struct noise_remote *,
struct noise_response *);
int noise_remote_begin_session(struct noise_remote *);
void noise_remote_clear(struct noise_remote *);
void noise_remote_expire_current(struct noise_remote *);
int noise_remote_ready(struct noise_remote *);
int noise_remote_encrypt(
struct noise_remote *,
struct noise_data *,
size_t);
int noise_remote_decrypt(
struct noise_remote *,
struct noise_data *,
size_t);
#endif /* __NOISE_H__ */

View File

@ -0,0 +1,50 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _ZINC_BLAKE2S_H
#define _ZINC_BLAKE2S_H
#include <sys/types.h>
enum blake2s_lengths {
BLAKE2S_BLOCK_SIZE = 64,
BLAKE2S_HASH_SIZE = 32,
BLAKE2S_KEY_SIZE = 32
};
struct blake2s_state {
uint32_t h[8];
uint32_t t[2];
uint32_t f[2];
uint8_t buf[BLAKE2S_BLOCK_SIZE];
unsigned int buflen;
unsigned int outlen;
};
void blake2s_init(struct blake2s_state *state, const size_t outlen);
void blake2s_init_key(struct blake2s_state *state, const size_t outlen,
const void *key, const size_t keylen);
void blake2s_update(struct blake2s_state *state, const uint8_t *in, size_t inlen);
//void blake2s_final(struct blake2s_state *state, uint8_t *out);
static inline void blake2s(uint8_t *out, const uint8_t *in, const uint8_t *key,
const size_t outlen, const size_t inlen,
const size_t keylen)
{
struct blake2s_state state;
if (keylen)
blake2s_init_key(&state, outlen, key, keylen);
else
blake2s_init(&state, outlen);
blake2s_update(&state, in, inlen);
blake2s_final(&state, out);
}
void blake2s_hmac(uint8_t *out, const uint8_t *in, const uint8_t *key, const size_t outlen,
const size_t inlen, const size_t keylen);
#endif /* _ZINC_BLAKE2S_H */

View File

@ -0,0 +1,68 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _ZINC_CHACHA20_H
#define _ZINC_CHACHA20_H
#include <sys/param.h>
#include <sys/support.h>
enum chacha20_lengths {
CHACHA20_NONCE_SIZE = 16,
CHACHA20_KEY_SIZE = 32,
CHACHA20_KEY_WORDS = CHACHA20_KEY_SIZE / sizeof(u32),
CHACHA20_BLOCK_SIZE = 64,
CHACHA20_BLOCK_WORDS = CHACHA20_BLOCK_SIZE / sizeof(u32),
HCHACHA20_NONCE_SIZE = CHACHA20_NONCE_SIZE,
HCHACHA20_KEY_SIZE = CHACHA20_KEY_SIZE
};
enum chacha20_constants { /* expand 32-byte k */
CHACHA20_CONSTANT_EXPA = 0x61707865U,
CHACHA20_CONSTANT_ND_3 = 0x3320646eU,
CHACHA20_CONSTANT_2_BY = 0x79622d32U,
CHACHA20_CONSTANT_TE_K = 0x6b206574U
};
struct chacha20_ctx {
union {
u32 state[16];
struct {
u32 constant[4];
u32 key[8];
u32 counter[4];
};
};
};
static inline void chacha20_init(struct chacha20_ctx *ctx,
const u8 key[CHACHA20_KEY_SIZE],
const u64 nonce)
{
ctx->constant[0] = CHACHA20_CONSTANT_EXPA;
ctx->constant[1] = CHACHA20_CONSTANT_ND_3;
ctx->constant[2] = CHACHA20_CONSTANT_2_BY;
ctx->constant[3] = CHACHA20_CONSTANT_TE_K;
ctx->key[0] = get_unaligned_le32(key + 0);
ctx->key[1] = get_unaligned_le32(key + 4);
ctx->key[2] = get_unaligned_le32(key + 8);
ctx->key[3] = get_unaligned_le32(key + 12);
ctx->key[4] = get_unaligned_le32(key + 16);
ctx->key[5] = get_unaligned_le32(key + 20);
ctx->key[6] = get_unaligned_le32(key + 24);
ctx->key[7] = get_unaligned_le32(key + 28);
ctx->counter[0] = 0;
ctx->counter[1] = 0;
ctx->counter[2] = nonce & U32_MAX;
ctx->counter[3] = nonce >> 32;
}
void chacha20(struct chacha20_ctx *ctx, u8 *dst, const u8 *src, u32 len,
simd_context_t *simd_context);
void hchacha20(u32 derived_key[CHACHA20_KEY_WORDS],
const u8 nonce[HCHACHA20_NONCE_SIZE],
const u8 key[HCHACHA20_KEY_SIZE], simd_context_t *simd_context);
#endif /* _ZINC_CHACHA20_H */

View File

@ -0,0 +1,48 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _ZINC_CHACHA20POLY1305_H
#define _ZINC_CHACHA20POLY1305_H
#include <sys/types.h>
struct scatterlist;
enum chacha20poly1305_lengths {
XCHACHA20POLY1305_NONCE_SIZE = 24,
CHACHA20POLY1305_KEY_SIZE = 32,
CHACHA20POLY1305_AUTHTAG_SIZE = 16
};
void chacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src, const size_t src_len,
const uint8_t *ad, const size_t ad_len,
const uint64_t nonce,
const uint8_t key[CHACHA20POLY1305_KEY_SIZE]);
bool chacha20poly1305_encrypt_sg_inplace(
struct scatterlist *src, const size_t src_len, const uint8_t *ad,
const size_t ad_len, const uint64_t nonce,
const uint8_t key[CHACHA20POLY1305_KEY_SIZE], simd_context_t *simd_context);
bool chacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src, const size_t src_len,
const uint8_t *ad, const size_t ad_len, const uint64_t nonce,
const uint8_t key[CHACHA20POLY1305_KEY_SIZE]);
bool chacha20poly1305_decrypt_sg_inplace(
struct scatterlist *src, size_t src_len, const uint8_t *ad,
const size_t ad_len, const uint64_t nonce,
const uint8_t key[CHACHA20POLY1305_KEY_SIZE], simd_context_t *simd_context);
void xchacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src, const size_t src_len,
const uint8_t *ad, const size_t ad_len,
const uint8_t nonce[XCHACHA20POLY1305_NONCE_SIZE],
const uint8_t key[CHACHA20POLY1305_KEY_SIZE]);
bool xchacha20poly1305_decrypt(
uint8_t *dst, const uint8_t *src, const size_t src_len, const uint8_t *ad,
const size_t ad_len, const uint8_t nonce[XCHACHA20POLY1305_NONCE_SIZE],
const uint8_t key[CHACHA20POLY1305_KEY_SIZE]);
#endif /* _ZINC_CHACHA20POLY1305_H */

View File

@ -0,0 +1,28 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _ZINC_CURVE25519_H
#define _ZINC_CURVE25519_H
#include <sys/types.h>
enum curve25519_lengths {
CURVE25519_KEY_SIZE = 32
};
bool curve25519(uint8_t mypublic[CURVE25519_KEY_SIZE],
const uint8_t secret[CURVE25519_KEY_SIZE],
const uint8_t basepoint[CURVE25519_KEY_SIZE]);
void curve25519_generate_secret(uint8_t secret[CURVE25519_KEY_SIZE]);
bool curve25519_generate_public(
uint8_t pub[CURVE25519_KEY_SIZE], const uint8_t secret[CURVE25519_KEY_SIZE]);
static inline void curve25519_clamp_secret(uint8_t secret[CURVE25519_KEY_SIZE])
{
secret[0] &= 248;
secret[31] = (secret[31] & 127) | 64;
}
#endif /* _ZINC_CURVE25519_H */

View File

@ -0,0 +1,29 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _ZINC_POLY1305_H
#define _ZINC_POLY1305_H
enum poly1305_lengths {
POLY1305_BLOCK_SIZE = 16,
POLY1305_KEY_SIZE = 32,
POLY1305_MAC_SIZE = 16
};
struct poly1305_ctx {
u8 opaque[24 * sizeof(u64)];
u32 nonce[4];
u8 data[POLY1305_BLOCK_SIZE];
size_t num;
} __aligned(8);
void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE]);
void poly1305_update(struct poly1305_ctx *ctx, const u8 *input, size_t len,
simd_context_t *simd_context);
void poly1305_final(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE],
simd_context_t *simd_context);
#endif /* _ZINC_POLY1305_H */

View File

@ -0,0 +1,256 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2012 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* This is an implementation of the BLAKE2s hash and PRF functions.
*
* Information: https://blake2.net/
*
*/
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/endian.h>
#include <crypto/blake2s.h>
static inline uint32_t
ror32(uint32_t word, unsigned int shift)
{
return (word >> shift) | (word << (32 - shift));
}
typedef union {
struct {
uint8_t digest_length;
uint8_t key_length;
uint8_t fanout;
uint8_t depth;
uint32_t leaf_length;
uint32_t node_offset;
uint16_t xof_length;
uint8_t node_depth;
uint8_t inner_length;
uint8_t salt[8];
uint8_t personal[8];
};
uint32_t words[8];
} __packed blake2s_param;
static const uint32_t blake2s_iv[8] = {
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
};
static const uint8_t blake2s_sigma[10][16] = {
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
};
static inline void blake2s_set_lastblock(struct blake2s_state *state)
{
if (state->last_node)
state->f[1] = -1;
state->f[0] = -1;
}
static inline void blake2s_increment_counter(struct blake2s_state *state,
const uint32_t inc)
{
state->t[0] += inc;
state->t[1] += (state->t[0] < inc);
}
static inline void blake2s_init_param(struct blake2s_state *state,
const blake2s_param *param)
{
int i;
memset(state, 0, sizeof(*state));
for (i = 0; i < 8; ++i)
state->h[i] = blake2s_iv[i] ^ le32toh(param->words[i]);
}
void blake2s_init(struct blake2s_state *state, const size_t outlen)
{
blake2s_param param __aligned(__alignof__(uint32_t)) = {
.digest_length = outlen,
.fanout = 1,
.depth = 1
};
/*WARN_ON(IS_ENABLED(DEBUG) && (!outlen || outlen > BLAKE2S_HASH_SIZE));*/
blake2s_init_param(state, &param);
}
void blake2s_init_key(struct blake2s_state *state, const size_t outlen,
const void *key, const size_t keylen)
{
blake2s_param param = { .digest_length = outlen,
.key_length = keylen,
.fanout = 1,
.depth = 1 };
uint8_t block[BLAKE2S_BLOCK_SIZE] = { 0 };
/*WARN_ON(IS_ENABLED(DEBUG) && (!outlen || outlen > BLAKE2S_HASH_SIZE ||
!key || !keylen || keylen > BLAKE2S_KEY_SIZE));*/
blake2s_init_param(state, &param);
memcpy(block, key, keylen);
blake2s_update(state, block, BLAKE2S_BLOCK_SIZE);
explicit_bzero(block, BLAKE2S_BLOCK_SIZE);
}
static inline void blake2s_compress(struct blake2s_state *state,
const uint8_t *block, size_t nblocks,
const uint32_t inc)
{
uint32_t m[16];
uint32_t v[16];
int i;
/*WARN_ON(IS_ENABLED(DEBUG) &&
(nblocks > 1 && inc != BLAKE2S_BLOCK_SIZE));*/
while (nblocks > 0) {
blake2s_increment_counter(state, inc);
memcpy(m, block, BLAKE2S_BLOCK_SIZE);
for(i = 0; i < (sizeof(m)/sizeof(m[0])); i++)
(m[i]) = le32toh((m[i]));
memcpy(v, state->h, 32);
v[ 8] = blake2s_iv[0];
v[ 9] = blake2s_iv[1];
v[10] = blake2s_iv[2];
v[11] = blake2s_iv[3];
v[12] = blake2s_iv[4] ^ state->t[0];
v[13] = blake2s_iv[5] ^ state->t[1];
v[14] = blake2s_iv[6] ^ state->f[0];
v[15] = blake2s_iv[7] ^ state->f[1];
#define G(r, i, a, b, c, d) do { \
a += b + m[blake2s_sigma[r][2 * i + 0]]; \
d = ror32(d ^ a, 16); \
c += d; \
b = ror32(b ^ c, 12); \
a += b + m[blake2s_sigma[r][2 * i + 1]]; \
d = ror32(d ^ a, 8); \
c += d; \
b = ror32(b ^ c, 7); \
} while (0)
#define ROUND(r) do { \
G(r, 0, v[0], v[ 4], v[ 8], v[12]); \
G(r, 1, v[1], v[ 5], v[ 9], v[13]); \
G(r, 2, v[2], v[ 6], v[10], v[14]); \
G(r, 3, v[3], v[ 7], v[11], v[15]); \
G(r, 4, v[0], v[ 5], v[10], v[15]); \
G(r, 5, v[1], v[ 6], v[11], v[12]); \
G(r, 6, v[2], v[ 7], v[ 8], v[13]); \
G(r, 7, v[3], v[ 4], v[ 9], v[14]); \
} while (0)
ROUND(0);
ROUND(1);
ROUND(2);
ROUND(3);
ROUND(4);
ROUND(5);
ROUND(6);
ROUND(7);
ROUND(8);
ROUND(9);
#undef G
#undef ROUND
for (i = 0; i < 8; ++i)
state->h[i] ^= v[i] ^ v[i + 8];
block += BLAKE2S_BLOCK_SIZE;
--nblocks;
}
}
void blake2s_update(struct blake2s_state *state, const uint8_t *in, size_t inlen)
{
const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
if (!inlen)
return;
if (inlen > fill) {
memcpy(state->buf + state->buflen, in, fill);
blake2s_compress(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
state->buflen = 0;
in += fill;
inlen -= fill;
}
if (inlen > BLAKE2S_BLOCK_SIZE) {
const size_t nblocks =
(inlen + BLAKE2S_BLOCK_SIZE - 1) / BLAKE2S_BLOCK_SIZE;
/* Hash one less (full) block than strictly possible */
blake2s_compress(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
}
memcpy(state->buf + state->buflen, in, inlen);
state->buflen += inlen;
}
void blake2s_final(struct blake2s_state *state, uint8_t *out, const size_t outlen)
{
int i;
/*WARN_ON(IS_ENABLED(DEBUG) &&
(!out || !outlen || outlen > BLAKE2S_HASH_SIZE));*/
blake2s_set_lastblock(state);
memset(state->buf + state->buflen, 0,
BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
blake2s_compress(state, state->buf, 1, state->buflen);
for(i = 0; i < (sizeof(state->h)/sizeof(state->h[0])); i++)
(state->h[i]) = htole32((state->h[i]));
memcpy(out, state->h, outlen);
explicit_bzero(state, sizeof(*state));
}
void blake2s_hmac(uint8_t *out, const uint8_t *in, const uint8_t *key, const size_t outlen,
const size_t inlen, const size_t keylen)
{
struct blake2s_state state;
uint8_t x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(uint32_t)) = { 0 };
uint8_t i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(uint32_t));
int i;
if (keylen > BLAKE2S_BLOCK_SIZE) {
blake2s_init(&state, BLAKE2S_HASH_SIZE);
blake2s_update(&state, key, keylen);
blake2s_final(&state, x_key, BLAKE2S_HASH_SIZE);
} else
memcpy(x_key, key, keylen);
for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
x_key[i] ^= 0x36;
blake2s_init(&state, BLAKE2S_HASH_SIZE);
blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
blake2s_update(&state, in, inlen);
blake2s_final(&state, i_hash, BLAKE2S_HASH_SIZE);
for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
x_key[i] ^= 0x5c ^ 0x36;
blake2s_init(&state, BLAKE2S_HASH_SIZE);
blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE);
blake2s_final(&state, i_hash, BLAKE2S_HASH_SIZE);
memcpy(out, i_hash, outlen);
explicit_bzero(x_key, BLAKE2S_BLOCK_SIZE);
explicit_bzero(i_hash, BLAKE2S_HASH_SIZE);
}

View File

@ -0,0 +1,58 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include <sys/types.h>
#ifndef _BLAKE2S_H_
#define _BLAKE2S_H_
/*#define WARN_ON(a) if(a) printf("%s failed at %s:%d\n", #a, __FILE__, __LINE__)
#define IS_ENABLED(...) true*/
enum blake2s_lengths {
BLAKE2S_BLOCK_SIZE = 64,
BLAKE2S_HASH_SIZE = 32,
BLAKE2S_KEY_SIZE = 32
};
struct blake2s_state {
uint32_t h[8];
uint32_t t[2];
uint32_t f[2];
uint8_t buf[BLAKE2S_BLOCK_SIZE];
size_t buflen;
uint8_t last_node;
};
void blake2s_init(struct blake2s_state *state, const size_t outlen);
void blake2s_init_key(struct blake2s_state *state, const size_t outlen,
const void *key, const size_t keylen);
void blake2s_update(struct blake2s_state *state, const uint8_t *in, size_t inlen);
void blake2s_final(struct blake2s_state *state, uint8_t *out, const size_t outlen);
static inline void blake2s(uint8_t *out, const uint8_t *in, const uint8_t *key,
const size_t outlen, const size_t inlen,
const size_t keylen)
{
struct blake2s_state state;
/*WARN_ON(IS_ENABLED(DEBUG) && ((!in && inlen > 0) || !out || !outlen ||
outlen > BLAKE2S_HASH_SIZE || keylen > BLAKE2S_KEY_SIZE ||
(!key && keylen)));*/
if (keylen)
blake2s_init_key(&state, outlen, key, keylen);
else
blake2s_init(&state, outlen);
blake2s_update(&state, in, inlen);
blake2s_final(&state, out, outlen);
}
void blake2s_hmac(uint8_t *out, const uint8_t *in, const uint8_t *key,
const size_t outlen, const size_t inlen, const size_t keylen);
#endif /* _BLAKE2S_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,98 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include <asm/hwcap.h>
#include <asm/neon.h>
#if defined(CONFIG_ZINC_ARCH_ARM)
#include <asm/system_info.h>
#include <asm/cputype.h>
#endif
asmlinkage void chacha20_arm(u8 *out, const u8 *in, const size_t len,
const u32 key[8], const u32 counter[4]);
asmlinkage void hchacha20_arm(const u32 state[16], u32 out[8]);
asmlinkage void chacha20_neon(u8 *out, const u8 *in, const size_t len,
const u32 key[8], const u32 counter[4]);
static bool chacha20_use_neon __ro_after_init;
static bool *const chacha20_nobs[] __initconst = { &chacha20_use_neon };
static void __init chacha20_fpu_init(void)
{
#if defined(CONFIG_ZINC_ARCH_ARM64)
chacha20_use_neon = cpu_have_named_feature(ASIMD);
#elif defined(CONFIG_ZINC_ARCH_ARM)
switch (read_cpuid_part()) {
case ARM_CPU_PART_CORTEX_A7:
case ARM_CPU_PART_CORTEX_A5:
/* The Cortex-A7 and Cortex-A5 do not perform well with the NEON
* implementation but do incredibly with the scalar one and use
* less power.
*/
break;
default:
chacha20_use_neon = elf_hwcap & HWCAP_NEON;
}
#endif
}
static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst,
const u8 *src, size_t len,
simd_context_t *simd_context)
{
/* SIMD disables preemption, so relax after processing each page. */
BUILD_BUG_ON(PAGE_SIZE < CHACHA20_BLOCK_SIZE ||
PAGE_SIZE % CHACHA20_BLOCK_SIZE);
for (;;) {
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && chacha20_use_neon &&
len >= CHACHA20_BLOCK_SIZE * 3 && simd_use(simd_context)) {
const size_t bytes = min_t(size_t, len, PAGE_SIZE);
chacha20_neon(dst, src, bytes, ctx->key, ctx->counter);
ctx->counter[0] += (bytes + 63) / 64;
len -= bytes;
if (!len)
break;
dst += bytes;
src += bytes;
simd_relax(simd_context);
} else {
chacha20_arm(dst, src, len, ctx->key, ctx->counter);
ctx->counter[0] += (len + 63) / 64;
break;
}
}
return true;
}
static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS],
const u8 nonce[HCHACHA20_NONCE_SIZE],
const u8 key[HCHACHA20_KEY_SIZE],
simd_context_t *simd_context)
{
if (IS_ENABLED(CONFIG_ZINC_ARCH_ARM)) {
u32 x[] = { CHACHA20_CONSTANT_EXPA,
CHACHA20_CONSTANT_ND_3,
CHACHA20_CONSTANT_2_BY,
CHACHA20_CONSTANT_TE_K,
get_unaligned_le32(key + 0),
get_unaligned_le32(key + 4),
get_unaligned_le32(key + 8),
get_unaligned_le32(key + 12),
get_unaligned_le32(key + 16),
get_unaligned_le32(key + 20),
get_unaligned_le32(key + 24),
get_unaligned_le32(key + 28),
get_unaligned_le32(nonce + 0),
get_unaligned_le32(nonce + 4),
get_unaligned_le32(nonce + 8),
get_unaligned_le32(nonce + 12)
};
hchacha20_arm(x, derived_key);
return true;
}
return false;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,27 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
asmlinkage void chacha20_mips(u32 state[16], u8 *out, const u8 *in,
const size_t len);
static bool *const chacha20_nobs[] __initconst = { };
static void __init chacha20_fpu_init(void)
{
}
static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst,
const u8 *src, size_t len,
simd_context_t *simd_context)
{
chacha20_mips(ctx->state, dst, src, len);
return true;
}
static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS],
const u8 nonce[HCHACHA20_NONCE_SIZE],
const u8 key[HCHACHA20_KEY_SIZE],
simd_context_t *simd_context)
{
return false;
}

View File

@ -0,0 +1,424 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com>. All Rights Reserved.
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#define MASK_U32 0x3c
#define CHACHA20_BLOCK_SIZE 64
#define STACK_SIZE 32
#define X0 $t0
#define X1 $t1
#define X2 $t2
#define X3 $t3
#define X4 $t4
#define X5 $t5
#define X6 $t6
#define X7 $t7
#define X8 $t8
#define X9 $t9
#define X10 $v1
#define X11 $s6
#define X12 $s5
#define X13 $s4
#define X14 $s3
#define X15 $s2
/* Use regs which are overwritten on exit for Tx so we don't leak clear data. */
#define T0 $s1
#define T1 $s0
#define T(n) T ## n
#define X(n) X ## n
/* Input arguments */
#define STATE $a0
#define OUT $a1
#define IN $a2
#define BYTES $a3
/* Output argument */
/* NONCE[0] is kept in a register and not in memory.
* We don't want to touch original value in memory.
* Must be incremented every loop iteration.
*/
#define NONCE_0 $v0
/* SAVED_X and SAVED_CA are set in the jump table.
* Use regs which are overwritten on exit else we don't leak clear data.
* They are used to handling the last bytes which are not multiple of 4.
*/
#define SAVED_X X15
#define SAVED_CA $s7
#define IS_UNALIGNED $s7
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define MSB 0
#define LSB 3
#define ROTx rotl
#define ROTR(n) rotr n, 24
#define CPU_TO_LE32(n) \
wsbh n; \
rotr n, 16;
#else
#define MSB 3
#define LSB 0
#define ROTx rotr
#define CPU_TO_LE32(n)
#define ROTR(n)
#endif
#define FOR_EACH_WORD(x) \
x( 0); \
x( 1); \
x( 2); \
x( 3); \
x( 4); \
x( 5); \
x( 6); \
x( 7); \
x( 8); \
x( 9); \
x(10); \
x(11); \
x(12); \
x(13); \
x(14); \
x(15);
#define FOR_EACH_WORD_REV(x) \
x(15); \
x(14); \
x(13); \
x(12); \
x(11); \
x(10); \
x( 9); \
x( 8); \
x( 7); \
x( 6); \
x( 5); \
x( 4); \
x( 3); \
x( 2); \
x( 1); \
x( 0);
#define PLUS_ONE_0 1
#define PLUS_ONE_1 2
#define PLUS_ONE_2 3
#define PLUS_ONE_3 4
#define PLUS_ONE_4 5
#define PLUS_ONE_5 6
#define PLUS_ONE_6 7
#define PLUS_ONE_7 8
#define PLUS_ONE_8 9
#define PLUS_ONE_9 10
#define PLUS_ONE_10 11
#define PLUS_ONE_11 12
#define PLUS_ONE_12 13
#define PLUS_ONE_13 14
#define PLUS_ONE_14 15
#define PLUS_ONE_15 16
#define PLUS_ONE(x) PLUS_ONE_ ## x
#define _CONCAT3(a,b,c) a ## b ## c
#define CONCAT3(a,b,c) _CONCAT3(a,b,c)
#define STORE_UNALIGNED(x) \
CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
.if (x != 12); \
lw T0, (x*4)(STATE); \
.endif; \
lwl T1, (x*4)+MSB ## (IN); \
lwr T1, (x*4)+LSB ## (IN); \
.if (x == 12); \
addu X ## x, NONCE_0; \
.else; \
addu X ## x, T0; \
.endif; \
CPU_TO_LE32(X ## x); \
xor X ## x, T1; \
swl X ## x, (x*4)+MSB ## (OUT); \
swr X ## x, (x*4)+LSB ## (OUT);
#define STORE_ALIGNED(x) \
CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
.if (x != 12); \
lw T0, (x*4)(STATE); \
.endif; \
lw T1, (x*4) ## (IN); \
.if (x == 12); \
addu X ## x, NONCE_0; \
.else; \
addu X ## x, T0; \
.endif; \
CPU_TO_LE32(X ## x); \
xor X ## x, T1; \
sw X ## x, (x*4) ## (OUT);
/* Jump table macro.
* Used for setup and handling the last bytes, which are not multiple of 4.
* X15 is free to store Xn
* Every jumptable entry must be equal in size.
*/
#define JMPTBL_ALIGNED(x) \
.Lchacha20_mips_jmptbl_aligned_ ## x: ; \
.set noreorder; \
b .Lchacha20_mips_xor_aligned_ ## x ## _b; \
.if (x == 12); \
addu SAVED_X, X ## x, NONCE_0; \
.else; \
addu SAVED_X, X ## x, SAVED_CA; \
.endif; \
.set reorder
#define JMPTBL_UNALIGNED(x) \
.Lchacha20_mips_jmptbl_unaligned_ ## x: ; \
.set noreorder; \
b .Lchacha20_mips_xor_unaligned_ ## x ## _b; \
.if (x == 12); \
addu SAVED_X, X ## x, NONCE_0; \
.else; \
addu SAVED_X, X ## x, SAVED_CA; \
.endif; \
.set reorder
#define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \
addu X(A), X(K); \
addu X(B), X(L); \
addu X(C), X(M); \
addu X(D), X(N); \
xor X(V), X(A); \
xor X(W), X(B); \
xor X(Y), X(C); \
xor X(Z), X(D); \
rotl X(V), S; \
rotl X(W), S; \
rotl X(Y), S; \
rotl X(Z), S;
.text
.set reorder
.set noat
.globl chacha20_mips
.ent chacha20_mips
chacha20_mips:
.frame $sp, STACK_SIZE, $ra
addiu $sp, -STACK_SIZE
/* Return bytes = 0. */
beqz BYTES, .Lchacha20_mips_end
lw NONCE_0, 48(STATE)
/* Save s0-s7 */
sw $s0, 0($sp)
sw $s1, 4($sp)
sw $s2, 8($sp)
sw $s3, 12($sp)
sw $s4, 16($sp)
sw $s5, 20($sp)
sw $s6, 24($sp)
sw $s7, 28($sp)
/* Test IN or OUT is unaligned.
* IS_UNALIGNED = ( IN | OUT ) & 0x00000003
*/
or IS_UNALIGNED, IN, OUT
andi IS_UNALIGNED, 0x3
/* Set number of rounds */
li $at, 20
b .Lchacha20_rounds_start
.align 4
.Loop_chacha20_rounds:
addiu IN, CHACHA20_BLOCK_SIZE
addiu OUT, CHACHA20_BLOCK_SIZE
addiu NONCE_0, 1
.Lchacha20_rounds_start:
lw X0, 0(STATE)
lw X1, 4(STATE)
lw X2, 8(STATE)
lw X3, 12(STATE)
lw X4, 16(STATE)
lw X5, 20(STATE)
lw X6, 24(STATE)
lw X7, 28(STATE)
lw X8, 32(STATE)
lw X9, 36(STATE)
lw X10, 40(STATE)
lw X11, 44(STATE)
move X12, NONCE_0
lw X13, 52(STATE)
lw X14, 56(STATE)
lw X15, 60(STATE)
.Loop_chacha20_xor_rounds:
addiu $at, -2
AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16);
AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12);
AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8);
AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7);
AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16);
AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12);
AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8);
AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7);
bnez $at, .Loop_chacha20_xor_rounds
addiu BYTES, -(CHACHA20_BLOCK_SIZE)
/* Is data src/dst unaligned? Jump */
bnez IS_UNALIGNED, .Loop_chacha20_unaligned
/* Set number rounds here to fill delayslot. */
li $at, 20
/* BYTES < 0, it has no full block. */
bltz BYTES, .Lchacha20_mips_no_full_block_aligned
FOR_EACH_WORD_REV(STORE_ALIGNED)
/* BYTES > 0? Loop again. */
bgtz BYTES, .Loop_chacha20_rounds
/* Place this here to fill delay slot */
addiu NONCE_0, 1
/* BYTES < 0? Handle last bytes */
bltz BYTES, .Lchacha20_mips_xor_bytes
.Lchacha20_mips_xor_done:
/* Restore used registers */
lw $s0, 0($sp)
lw $s1, 4($sp)
lw $s2, 8($sp)
lw $s3, 12($sp)
lw $s4, 16($sp)
lw $s5, 20($sp)
lw $s6, 24($sp)
lw $s7, 28($sp)
/* Write NONCE_0 back to right location in state */
sw NONCE_0, 48(STATE)
.Lchacha20_mips_end:
addiu $sp, STACK_SIZE
jr $ra
.Lchacha20_mips_no_full_block_aligned:
/* Restore the offset on BYTES */
addiu BYTES, CHACHA20_BLOCK_SIZE
/* Get number of full WORDS */
andi $at, BYTES, MASK_U32
/* Load upper half of jump table addr */
lui T0, %hi(.Lchacha20_mips_jmptbl_aligned_0)
/* Calculate lower half jump table offset */
ins T0, $at, 1, 6
/* Add offset to STATE */
addu T1, STATE, $at
/* Add lower half jump table addr */
addiu T0, %lo(.Lchacha20_mips_jmptbl_aligned_0)
/* Read value from STATE */
lw SAVED_CA, 0(T1)
/* Store remaining bytecounter as negative value */
subu BYTES, $at, BYTES
jr T0
/* Jump table */
FOR_EACH_WORD(JMPTBL_ALIGNED)
.Loop_chacha20_unaligned:
/* Set number rounds here to fill delayslot. */
li $at, 20
/* BYTES > 0, it has no full block. */
bltz BYTES, .Lchacha20_mips_no_full_block_unaligned
FOR_EACH_WORD_REV(STORE_UNALIGNED)
/* BYTES > 0? Loop again. */
bgtz BYTES, .Loop_chacha20_rounds
/* Write NONCE_0 back to right location in state */
sw NONCE_0, 48(STATE)
.set noreorder
/* Fall through to byte handling */
bgez BYTES, .Lchacha20_mips_xor_done
.Lchacha20_mips_xor_unaligned_0_b:
.Lchacha20_mips_xor_aligned_0_b:
/* Place this here to fill delay slot */
addiu NONCE_0, 1
.set reorder
.Lchacha20_mips_xor_bytes:
addu IN, $at
addu OUT, $at
/* First byte */
lbu T1, 0(IN)
addiu $at, BYTES, 1
CPU_TO_LE32(SAVED_X)
ROTR(SAVED_X)
xor T1, SAVED_X
sb T1, 0(OUT)
beqz $at, .Lchacha20_mips_xor_done
/* Second byte */
lbu T1, 1(IN)
addiu $at, BYTES, 2
ROTx SAVED_X, 8
xor T1, SAVED_X
sb T1, 1(OUT)
beqz $at, .Lchacha20_mips_xor_done
/* Third byte */
lbu T1, 2(IN)
ROTx SAVED_X, 8
xor T1, SAVED_X
sb T1, 2(OUT)
b .Lchacha20_mips_xor_done
.Lchacha20_mips_no_full_block_unaligned:
/* Restore the offset on BYTES */
addiu BYTES, CHACHA20_BLOCK_SIZE
/* Get number of full WORDS */
andi $at, BYTES, MASK_U32
/* Load upper half of jump table addr */
lui T0, %hi(.Lchacha20_mips_jmptbl_unaligned_0)
/* Calculate lower half jump table offset */
ins T0, $at, 1, 6
/* Add offset to STATE */
addu T1, STATE, $at
/* Add lower half jump table addr */
addiu T0, %lo(.Lchacha20_mips_jmptbl_unaligned_0)
/* Read value from STATE */
lw SAVED_CA, 0(T1)
/* Store remaining bytecounter as negative value */
subu BYTES, $at, BYTES
jr T0
/* Jump table */
FOR_EACH_WORD(JMPTBL_UNALIGNED)
.end chacha20_mips
.set at

View File

@ -0,0 +1,461 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2018 Google, Inc.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
/*
* Design notes:
*
* 16 registers would be needed to hold the state matrix, but only 14 are
* available because 'sp' and 'pc' cannot be used. So we spill the elements
* (x8, x9) to the stack and swap them out with (x10, x11). This adds one
* 'ldrd' and one 'strd' instruction per round.
*
* All rotates are performed using the implicit rotate operand accepted by the
* 'add' and 'eor' instructions. This is faster than using explicit rotate
* instructions. To make this work, we allow the values in the second and last
* rows of the ChaCha state matrix (rows 'b' and 'd') to temporarily have the
* wrong rotation amount. The rotation amount is then fixed up just in time
* when the values are used. 'brot' is the number of bits the values in row 'b'
* need to be rotated right to arrive at the correct values, and 'drot'
* similarly for row 'd'. (brot, drot) start out as (0, 0) but we make it such
* that they end up as (25, 24) after every round.
*/
// ChaCha state registers
X0 .req r0
X1 .req r1
X2 .req r2
X3 .req r3
X4 .req r4
X5 .req r5
X6 .req r6
X7 .req r7
X8_X10 .req r8 // shared by x8 and x10
X9_X11 .req r9 // shared by x9 and x11
X12 .req r10
X13 .req r11
X14 .req r12
X15 .req r14
.Lexpand_32byte_k:
// "expand 32-byte k"
.word 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
#ifdef __thumb2__
# define adrl adr
#endif
.macro __rev out, in, t0, t1, t2
.if __LINUX_ARM_ARCH__ >= 6
rev \out, \in
.else
lsl \t0, \in, #24
and \t1, \in, #0xff00
and \t2, \in, #0xff0000
orr \out, \t0, \in, lsr #24
orr \out, \out, \t1, lsl #8
orr \out, \out, \t2, lsr #8
.endif
.endm
.macro _le32_bswap x, t0, t1, t2
#ifdef __ARMEB__
__rev \x, \x, \t0, \t1, \t2
#endif
.endm
.macro _le32_bswap_4x a, b, c, d, t0, t1, t2
_le32_bswap \a, \t0, \t1, \t2
_le32_bswap \b, \t0, \t1, \t2
_le32_bswap \c, \t0, \t1, \t2
_le32_bswap \d, \t0, \t1, \t2
.endm
.macro __ldrd a, b, src, offset
#if __LINUX_ARM_ARCH__ >= 6
ldrd \a, \b, [\src, #\offset]
#else
ldr \a, [\src, #\offset]
ldr \b, [\src, #\offset + 4]
#endif
.endm
.macro __strd a, b, dst, offset
#if __LINUX_ARM_ARCH__ >= 6
strd \a, \b, [\dst, #\offset]
#else
str \a, [\dst, #\offset]
str \b, [\dst, #\offset + 4]
#endif
.endm
.macro _halfround a1, b1, c1, d1, a2, b2, c2, d2
// a += b; d ^= a; d = rol(d, 16);
add \a1, \a1, \b1, ror #brot
add \a2, \a2, \b2, ror #brot
eor \d1, \a1, \d1, ror #drot
eor \d2, \a2, \d2, ror #drot
// drot == 32 - 16 == 16
// c += d; b ^= c; b = rol(b, 12);
add \c1, \c1, \d1, ror #16
add \c2, \c2, \d2, ror #16
eor \b1, \c1, \b1, ror #brot
eor \b2, \c2, \b2, ror #brot
// brot == 32 - 12 == 20
// a += b; d ^= a; d = rol(d, 8);
add \a1, \a1, \b1, ror #20
add \a2, \a2, \b2, ror #20
eor \d1, \a1, \d1, ror #16
eor \d2, \a2, \d2, ror #16
// drot == 32 - 8 == 24
// c += d; b ^= c; b = rol(b, 7);
add \c1, \c1, \d1, ror #24
add \c2, \c2, \d2, ror #24
eor \b1, \c1, \b1, ror #20
eor \b2, \c2, \b2, ror #20
// brot == 32 - 7 == 25
.endm
.macro _doubleround
// column round
// quarterrounds: (x0, x4, x8, x12) and (x1, x5, x9, x13)
_halfround X0, X4, X8_X10, X12, X1, X5, X9_X11, X13
// save (x8, x9); restore (x10, x11)
__strd X8_X10, X9_X11, sp, 0
__ldrd X8_X10, X9_X11, sp, 8
// quarterrounds: (x2, x6, x10, x14) and (x3, x7, x11, x15)
_halfround X2, X6, X8_X10, X14, X3, X7, X9_X11, X15
.set brot, 25
.set drot, 24
// diagonal round
// quarterrounds: (x0, x5, x10, x15) and (x1, x6, x11, x12)
_halfround X0, X5, X8_X10, X15, X1, X6, X9_X11, X12
// save (x10, x11); restore (x8, x9)
__strd X8_X10, X9_X11, sp, 8
__ldrd X8_X10, X9_X11, sp, 0
// quarterrounds: (x2, x7, x8, x13) and (x3, x4, x9, x14)
_halfround X2, X7, X8_X10, X13, X3, X4, X9_X11, X14
.endm
.macro _chacha_permute nrounds
.set brot, 0
.set drot, 0
.rept \nrounds / 2
_doubleround
.endr
.endm
.macro _chacha nrounds
.Lnext_block\@:
// Stack: unused0-unused1 x10-x11 x0-x15 OUT IN LEN
// Registers contain x0-x9,x12-x15.
// Do the core ChaCha permutation to update x0-x15.
_chacha_permute \nrounds
add sp, #8
// Stack: x10-x11 orig_x0-orig_x15 OUT IN LEN
// Registers contain x0-x9,x12-x15.
// x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
// Free up some registers (r8-r12,r14) by pushing (x8-x9,x12-x15).
push {X8_X10, X9_X11, X12, X13, X14, X15}
// Load (OUT, IN, LEN).
ldr r14, [sp, #96]
ldr r12, [sp, #100]
ldr r11, [sp, #104]
orr r10, r14, r12
// Use slow path if fewer than 64 bytes remain.
cmp r11, #64
blt .Lxor_slowpath\@
// Use slow path if IN and/or OUT isn't 4-byte aligned. Needed even on
// ARMv6+, since ldmia and stmia (used below) still require alignment.
tst r10, #3
bne .Lxor_slowpath\@
// Fast path: XOR 64 bytes of aligned data.
// Stack: x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
// Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is OUT.
// x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
// x0-x3
__ldrd r8, r9, sp, 32
__ldrd r10, r11, sp, 40
add X0, X0, r8
add X1, X1, r9
add X2, X2, r10
add X3, X3, r11
_le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
ldmia r12!, {r8-r11}
eor X0, X0, r8
eor X1, X1, r9
eor X2, X2, r10
eor X3, X3, r11
stmia r14!, {X0-X3}
// x4-x7
__ldrd r8, r9, sp, 48
__ldrd r10, r11, sp, 56
add X4, r8, X4, ror #brot
add X5, r9, X5, ror #brot
ldmia r12!, {X0-X3}
add X6, r10, X6, ror #brot
add X7, r11, X7, ror #brot
_le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
eor X4, X4, X0
eor X5, X5, X1
eor X6, X6, X2
eor X7, X7, X3
stmia r14!, {X4-X7}
// x8-x15
pop {r0-r7} // (x8-x9,x12-x15,x10-x11)
__ldrd r8, r9, sp, 32
__ldrd r10, r11, sp, 40
add r0, r0, r8 // x8
add r1, r1, r9 // x9
add r6, r6, r10 // x10
add r7, r7, r11 // x11
_le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
ldmia r12!, {r8-r11}
eor r0, r0, r8 // x8
eor r1, r1, r9 // x9
eor r6, r6, r10 // x10
eor r7, r7, r11 // x11
stmia r14!, {r0,r1,r6,r7}
ldmia r12!, {r0,r1,r6,r7}
__ldrd r8, r9, sp, 48
__ldrd r10, r11, sp, 56
add r2, r8, r2, ror #drot // x12
add r3, r9, r3, ror #drot // x13
add r4, r10, r4, ror #drot // x14
add r5, r11, r5, ror #drot // x15
_le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
ldr r9, [sp, #72] // load LEN
eor r2, r2, r0 // x12
eor r3, r3, r1 // x13
eor r4, r4, r6 // x14
eor r5, r5, r7 // x15
subs r9, #64 // decrement and check LEN
stmia r14!, {r2-r5}
beq .Ldone\@
.Lprepare_for_next_block\@:
// Stack: x0-x15 OUT IN LEN
// Increment block counter (x12)
add r8, #1
// Store updated (OUT, IN, LEN)
str r14, [sp, #64]
str r12, [sp, #68]
str r9, [sp, #72]
mov r14, sp
// Store updated block counter (x12)
str r8, [sp, #48]
sub sp, #16
// Reload state and do next block
ldmia r14!, {r0-r11} // load x0-x11
__strd r10, r11, sp, 8 // store x10-x11 before state
ldmia r14, {r10-r12,r14} // load x12-x15
b .Lnext_block\@
.Lxor_slowpath\@:
// Slow path: < 64 bytes remaining, or unaligned input or output buffer.
// We handle it by storing the 64 bytes of keystream to the stack, then
// XOR-ing the needed portion with the data.
// Allocate keystream buffer
sub sp, #64
mov r14, sp
// Stack: ks0-ks15 x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
// Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is &ks0.
// x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
// Save keystream for x0-x3
__ldrd r8, r9, sp, 96
__ldrd r10, r11, sp, 104
add X0, X0, r8
add X1, X1, r9
add X2, X2, r10
add X3, X3, r11
_le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
stmia r14!, {X0-X3}
// Save keystream for x4-x7
__ldrd r8, r9, sp, 112
__ldrd r10, r11, sp, 120
add X4, r8, X4, ror #brot
add X5, r9, X5, ror #brot
add X6, r10, X6, ror #brot
add X7, r11, X7, ror #brot
_le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
add r8, sp, #64
stmia r14!, {X4-X7}
// Save keystream for x8-x15
ldm r8, {r0-r7} // (x8-x9,x12-x15,x10-x11)
__ldrd r8, r9, sp, 128
__ldrd r10, r11, sp, 136
add r0, r0, r8 // x8
add r1, r1, r9 // x9
add r6, r6, r10 // x10
add r7, r7, r11 // x11
_le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
stmia r14!, {r0,r1,r6,r7}
__ldrd r8, r9, sp, 144
__ldrd r10, r11, sp, 152
add r2, r8, r2, ror #drot // x12
add r3, r9, r3, ror #drot // x13
add r4, r10, r4, ror #drot // x14
add r5, r11, r5, ror #drot // x15
_le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
stmia r14, {r2-r5}
// Stack: ks0-ks15 unused0-unused7 x0-x15 OUT IN LEN
// Registers: r8 is block counter, r12 is IN.
ldr r9, [sp, #168] // LEN
ldr r14, [sp, #160] // OUT
cmp r9, #64
mov r0, sp
movle r1, r9
movgt r1, #64
// r1 is number of bytes to XOR, in range [1, 64]
.if __LINUX_ARM_ARCH__ < 6
orr r2, r12, r14
tst r2, #3 // IN or OUT misaligned?
bne .Lxor_next_byte\@
.endif
// XOR a word at a time
.rept 16
subs r1, #4
blt .Lxor_words_done\@
ldr r2, [r12], #4
ldr r3, [r0], #4
eor r2, r2, r3
str r2, [r14], #4
.endr
b .Lxor_slowpath_done\@
.Lxor_words_done\@:
ands r1, r1, #3
beq .Lxor_slowpath_done\@
// XOR a byte at a time
.Lxor_next_byte\@:
ldrb r2, [r12], #1
ldrb r3, [r0], #1
eor r2, r2, r3
strb r2, [r14], #1
subs r1, #1
bne .Lxor_next_byte\@
.Lxor_slowpath_done\@:
subs r9, #64
add sp, #96
bgt .Lprepare_for_next_block\@
.Ldone\@:
.endm // _chacha
/*
* void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8],
* const u32 iv[4]);
*/
SYM_FUNC_START(chacha20_arm)
cmp r2, #0 // len == 0?
reteq lr
push {r0-r2,r4-r11,lr}
// Push state x0-x15 onto stack.
// Also store an extra copy of x10-x11 just before the state.
ldr r4, [sp, #48] // iv
mov r0, sp
sub sp, #80
// iv: x12-x15
ldm r4, {X12,X13,X14,X15}
stmdb r0!, {X12,X13,X14,X15}
// key: x4-x11
__ldrd X8_X10, X9_X11, r3, 24
__strd X8_X10, X9_X11, sp, 8
stmdb r0!, {X8_X10, X9_X11}
ldm r3, {X4-X9_X11}
stmdb r0!, {X4-X9_X11}
// constants: x0-x3
adrl X3, .Lexpand_32byte_k
ldm X3, {X0-X3}
__strd X0, X1, sp, 16
__strd X2, X3, sp, 24
_chacha 20
add sp, #76
pop {r4-r11, pc}
SYM_FUNC_END(chacha20_arm)
/*
* void hchacha20_arm(const u32 state[16], u32 out[8]);
*/
SYM_FUNC_START(hchacha20_arm)
push {r1,r4-r11,lr}
mov r14, r0
ldmia r14!, {r0-r11} // load x0-x11
push {r10-r11} // store x10-x11 to stack
ldm r14, {r10-r12,r14} // load x12-x15
sub sp, #8
_chacha_permute 20
// Skip over (unused0-unused1, x10-x11)
add sp, #16
// Fix up rotations of x12-x15
ror X12, X12, #drot
ror X13, X13, #drot
pop {r4} // load 'out'
ror X14, X14, #drot
ror X15, X15, #drot
// Store (x0-x3,x12-x15) to 'out'
stm r4, {X0,X1,X2,X3,X12,X13,X14,X15}
pop {r4-r11,pc}
SYM_FUNC_END(hchacha20_arm)

View File

@ -0,0 +1,132 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifdef __linux__
#include <asm/fpu/api.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
#include <asm/intel-family.h>
#else
#include <sys/simd-x86_64.h>
#endif
asmlinkage void hchacha20_ssse3(u32 *derived_key, const u8 *nonce,
const u8 *key);
asmlinkage void chacha20_ssse3(u8 *out, const u8 *in, const size_t len,
const u32 key[8], const u32 counter[4]);
asmlinkage void chacha20_avx2(u8 *out, const u8 *in, const size_t len,
const u32 key[8], const u32 counter[4]);
asmlinkage void chacha20_avx512(u8 *out, const u8 *in, const size_t len,
const u32 key[8], const u32 counter[4]);
asmlinkage void chacha20_avx512vl(u8 *out, const u8 *in, const size_t len,
const u32 key[8], const u32 counter[4]);
static bool chacha20_use_ssse3 __ro_after_init;
static bool chacha20_use_avx2 __ro_after_init;
static bool chacha20_use_avx512 __ro_after_init;
static bool chacha20_use_avx512vl __ro_after_init;
static bool *const chacha20_nobs[] __initconst = {
&chacha20_use_ssse3, &chacha20_use_avx2, &chacha20_use_avx512,
&chacha20_use_avx512vl };
static void __init chacha20_fpu_init(void)
{
#ifdef __linux__
chacha20_use_ssse3 = boot_cpu_has(X86_FEATURE_SSSE3);
chacha20_use_avx2 =
boot_cpu_has(X86_FEATURE_AVX) &&
boot_cpu_has(X86_FEATURE_AVX2) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
#ifndef COMPAT_CANNOT_USE_AVX512
chacha20_use_avx512 =
boot_cpu_has(X86_FEATURE_AVX) &&
boot_cpu_has(X86_FEATURE_AVX2) &&
boot_cpu_has(X86_FEATURE_AVX512F) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
XFEATURE_MASK_AVX512, NULL) &&
/* Skylake downclocks unacceptably much when using zmm. */
boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X;
chacha20_use_avx512vl =
boot_cpu_has(X86_FEATURE_AVX) &&
boot_cpu_has(X86_FEATURE_AVX2) &&
boot_cpu_has(X86_FEATURE_AVX512F) &&
boot_cpu_has(X86_FEATURE_AVX512VL) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
XFEATURE_MASK_AVX512, NULL);
#endif
#else
chacha20_use_ssse3 = !!(cpu_feature2 & CPUID2_SSSE3);
chacha20_use_avx2 = !!(cpu_feature2 & CPUID2_AVX) &&
!!(cpu_stdext_feature & CPUID_STDEXT_AVX2) &&
__ymm_enabled();
chacha20_use_avx512 = chacha20_use_avx2 &&
!!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) &&
__zmm_enabled();
chacha20_use_avx512vl = chacha20_use_avx512 &&
!!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) &&
!!(cpu_stdext_feature & CPUID_STDEXT_AVX512VL);
#endif
if (bootverbose)
printf("ssse3: %d avx2: %d avx512: %d avx512vl: %d\n",
chacha20_use_ssse3,
chacha20_use_avx2,
chacha20_use_avx512,
chacha20_use_avx512vl);
}
static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst,
const u8 *src, size_t len,
simd_context_t *simd_context)
{
/* SIMD disables preemption, so relax after processing each page. */
BUILD_BUG_ON(PAGE_SIZE < CHACHA20_BLOCK_SIZE ||
PAGE_SIZE % CHACHA20_BLOCK_SIZE);
if (!chacha20_use_ssse3) {
return false;
}
if (len <= CHACHA20_BLOCK_SIZE) {
return false;
}
if (!simd_use(simd_context)) {
return false;
}
for (;;) {
const size_t bytes = min_t(size_t, len, PAGE_SIZE);
if (chacha20_use_avx512 &&
len >= CHACHA20_BLOCK_SIZE * 8)
chacha20_avx512(dst, src, bytes, ctx->key, ctx->counter);
else if (chacha20_use_avx512vl &&
len >= CHACHA20_BLOCK_SIZE * 4)
chacha20_avx512vl(dst, src, bytes, ctx->key, ctx->counter);
else if (chacha20_use_avx2 &&
len >= CHACHA20_BLOCK_SIZE * 4)
chacha20_avx2(dst, src, bytes, ctx->key, ctx->counter);
else
chacha20_ssse3(dst, src, bytes, ctx->key, ctx->counter);
ctx->counter[0] += (bytes + 63) / 64;
len -= bytes;
if (!len)
break;
dst += bytes;
src += bytes;
simd_relax(simd_context);
}
return true;
}
static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS],
const u8 nonce[HCHACHA20_NONCE_SIZE],
const u8 key[HCHACHA20_KEY_SIZE],
simd_context_t *simd_context)
{
if (IS_ENABLED(CONFIG_AS_SSSE3) && chacha20_use_ssse3 &&
simd_use(simd_context)) {
hchacha20_ssse3(derived_key, nonce, key);
return true;
}
return false;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,238 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* Implementation of the ChaCha20 stream cipher.
*
* Information: https://cr.yp.to/chacha.html
*/
#include <zinc/chacha20.h>
#include "../selftest/run.h"
#define IS_ENABLED_CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
#define IS_ENABLED_CONFIG_64BIT (sizeof(void*) == 8)
void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int len)
{
int relalign = 0;
if (!IS_ENABLED_CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) {
int size = sizeof(unsigned long);
int d = (((unsigned long)dst ^ (unsigned long)src1) |
((unsigned long)dst ^ (unsigned long)src2)) &
(size - 1);
relalign = d ? 1 << ffs(d) : size;
/*
* If we care about alignment, process as many bytes as
* needed to advance dst and src to values whose alignments
* equal their relative alignment. This will allow us to
* process the remainder of the input using optimal strides.
*/
while (((unsigned long)dst & (relalign - 1)) && len > 0) {
*dst++ = *src1++ ^ *src2++;
len--;
}
}
while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) {
*(u64 *)dst = *(const u64 *)src1 ^ *(const u64 *)src2;
dst += 8;
src1 += 8;
src2 += 8;
len -= 8;
}
while (len >= 4 && !(relalign & 3)) {
*(u32 *)dst = *(const u32 *)src1 ^ *(const u32 *)src2;
dst += 4;
src1 += 4;
src2 += 4;
len -= 4;
}
while (len >= 2 && !(relalign & 1)) {
*(u16 *)dst = *(const u16 *)src1 ^ *(const u16 *)src2;
dst += 2;
src1 += 2;
src2 += 2;
len -= 2;
}
while (len--)
*dst++ = *src1++ ^ *src2++;
}
#if defined(CONFIG_ZINC_ARCH_X86_64)
#include "chacha20-x86_64-glue.c"
#elif defined(CONFIG_ZINC_ARCH_ARM) || defined(CONFIG_ZINC_ARCH_ARM64)
#include "chacha20-arm-glue.c"
#elif defined(CONFIG_ZINC_ARCH_MIPS)
#include "chacha20-mips-glue.c"
#else
static bool *const chacha20_nobs[] __initconst = { };
static void __init chacha20_fpu_init(void)
{
}
static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst,
const u8 *src, size_t len,
simd_context_t *simd_context)
{
return false;
}
static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS],
const u8 nonce[HCHACHA20_NONCE_SIZE],
const u8 key[HCHACHA20_KEY_SIZE],
simd_context_t *simd_context)
{
return false;
}
#endif
#define QUARTER_ROUND(x, a, b, c, d) ( \
x[a] += x[b], \
x[d] = rol32((x[d] ^ x[a]), 16), \
x[c] += x[d], \
x[b] = rol32((x[b] ^ x[c]), 12), \
x[a] += x[b], \
x[d] = rol32((x[d] ^ x[a]), 8), \
x[c] += x[d], \
x[b] = rol32((x[b] ^ x[c]), 7) \
)
#define C(i, j) (i * 4 + j)
#define DOUBLE_ROUND(x) ( \
/* Column Round */ \
QUARTER_ROUND(x, C(0, 0), C(1, 0), C(2, 0), C(3, 0)), \
QUARTER_ROUND(x, C(0, 1), C(1, 1), C(2, 1), C(3, 1)), \
QUARTER_ROUND(x, C(0, 2), C(1, 2), C(2, 2), C(3, 2)), \
QUARTER_ROUND(x, C(0, 3), C(1, 3), C(2, 3), C(3, 3)), \
/* Diagonal Round */ \
QUARTER_ROUND(x, C(0, 0), C(1, 1), C(2, 2), C(3, 3)), \
QUARTER_ROUND(x, C(0, 1), C(1, 2), C(2, 3), C(3, 0)), \
QUARTER_ROUND(x, C(0, 2), C(1, 3), C(2, 0), C(3, 1)), \
QUARTER_ROUND(x, C(0, 3), C(1, 0), C(2, 1), C(3, 2)) \
)
#define TWENTY_ROUNDS(x) ( \
DOUBLE_ROUND(x), \
DOUBLE_ROUND(x), \
DOUBLE_ROUND(x), \
DOUBLE_ROUND(x), \
DOUBLE_ROUND(x), \
DOUBLE_ROUND(x), \
DOUBLE_ROUND(x), \
DOUBLE_ROUND(x), \
DOUBLE_ROUND(x), \
DOUBLE_ROUND(x) \
)
static void chacha20_block_generic(struct chacha20_ctx *ctx, __le32 *stream)
{
u32 x[CHACHA20_BLOCK_WORDS];
int i;
for (i = 0; i < ARRAY_SIZE(x); ++i)
x[i] = ctx->state[i];
TWENTY_ROUNDS(x);
for (i = 0; i < ARRAY_SIZE(x); ++i)
stream[i] = cpu_to_le32(x[i] + ctx->state[i]);
ctx->counter[0] += 1;
}
static void chacha20_generic(struct chacha20_ctx *ctx, u8 *out, const u8 *in,
u32 len)
{
__le32 buf[CHACHA20_BLOCK_WORDS];
while (len >= CHACHA20_BLOCK_SIZE) {
chacha20_block_generic(ctx, buf);
crypto_xor_cpy(out, in, (u8 *)buf, CHACHA20_BLOCK_SIZE);
len -= CHACHA20_BLOCK_SIZE;
out += CHACHA20_BLOCK_SIZE;
in += CHACHA20_BLOCK_SIZE;
}
if (len) {
chacha20_block_generic(ctx, buf);
crypto_xor_cpy(out, in, (u8 *)buf, len);
}
}
void chacha20(struct chacha20_ctx *ctx, u8 *dst, const u8 *src, u32 len,
simd_context_t *simd_context)
{
if (!chacha20_arch(ctx, dst, src, len, simd_context))
chacha20_generic(ctx, dst, src, len);
}
EXPORT_SYMBOL(chacha20);
static void hchacha20_generic(u32 derived_key[CHACHA20_KEY_WORDS],
const u8 nonce[HCHACHA20_NONCE_SIZE],
const u8 key[HCHACHA20_KEY_SIZE])
{
u32 x[] = { CHACHA20_CONSTANT_EXPA,
CHACHA20_CONSTANT_ND_3,
CHACHA20_CONSTANT_2_BY,
CHACHA20_CONSTANT_TE_K,
get_unaligned_le32(key + 0),
get_unaligned_le32(key + 4),
get_unaligned_le32(key + 8),
get_unaligned_le32(key + 12),
get_unaligned_le32(key + 16),
get_unaligned_le32(key + 20),
get_unaligned_le32(key + 24),
get_unaligned_le32(key + 28),
get_unaligned_le32(nonce + 0),
get_unaligned_le32(nonce + 4),
get_unaligned_le32(nonce + 8),
get_unaligned_le32(nonce + 12)
};
TWENTY_ROUNDS(x);
memcpy(derived_key + 0, x + 0, sizeof(u32) * 4);
memcpy(derived_key + 4, x + 12, sizeof(u32) * 4);
}
/* Derived key should be 32-bit aligned */
void hchacha20(u32 derived_key[CHACHA20_KEY_WORDS],
const u8 nonce[HCHACHA20_NONCE_SIZE],
const u8 key[HCHACHA20_KEY_SIZE], simd_context_t *simd_context)
{
if (!hchacha20_arch(derived_key, nonce, key, simd_context))
hchacha20_generic(derived_key, nonce, key);
}
EXPORT_SYMBOL(hchacha20);
#include "../selftest/chacha20.c"
static bool nosimd __initdata = false;
#ifndef COMPAT_ZINC_IS_A_MODULE
int __init chacha20_mod_init(void)
#else
static int __init mod_init(void)
#endif
{
if (!nosimd)
chacha20_fpu_init();
if (!selftest_run("chacha20", chacha20_selftest, chacha20_nobs,
ARRAY_SIZE(chacha20_nobs)))
return -ENOTRECOVERABLE;
return 0;
}
#ifdef COMPAT_ZINC_IS_A_MODULE
static void __exit mod_exit(void)
{
}
module_init(mod_init);
module_exit(mod_exit);
#endif

View File

@ -0,0 +1,196 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* This is an implementation of the ChaCha20Poly1305 AEAD construction.
*
* Information: https://tools.ietf.org/html/rfc8439
*/
#include <sys/support.h>
#include <zinc/chacha20poly1305.h>
#include <zinc/chacha20.h>
#include <zinc/poly1305.h>
#include "selftest/run.h"
static const u8 pad0[CHACHA20_BLOCK_SIZE] = { 0 };
static inline void
__chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len, const u64 nonce,
const u8 key[CHACHA20POLY1305_KEY_SIZE],
simd_context_t *simd_context)
{
struct poly1305_ctx poly1305_state;
struct chacha20_ctx chacha20_state;
union {
u8 block0[POLY1305_KEY_SIZE];
__le64 lens[2];
} b = { { 0 } };
chacha20_init(&chacha20_state, key, nonce);
chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0),
simd_context);
poly1305_init(&poly1305_state, b.block0);
poly1305_update(&poly1305_state, ad, ad_len, simd_context);
poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf,
simd_context);
chacha20(&chacha20_state, dst, src, src_len, simd_context);
poly1305_update(&poly1305_state, dst, src_len, simd_context);
poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf,
simd_context);
b.lens[0] = cpu_to_le64(ad_len);
b.lens[1] = cpu_to_le64(src_len);
poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens),
simd_context);
poly1305_final(&poly1305_state, dst + src_len, simd_context);
memzero_explicit(&chacha20_state, sizeof(chacha20_state));
memzero_explicit(&b, sizeof(b));
}
void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len,
const u64 nonce,
const u8 key[CHACHA20POLY1305_KEY_SIZE])
{
simd_context_t simd_context;
simd_get(&simd_context);
__chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, nonce, key,
&simd_context);
simd_put(&simd_context);
}
EXPORT_SYMBOL(chacha20poly1305_encrypt);
static inline bool
__chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len, const u64 nonce,
const u8 key[CHACHA20POLY1305_KEY_SIZE],
simd_context_t *simd_context)
{
struct poly1305_ctx poly1305_state;
struct chacha20_ctx chacha20_state;
int ret;
size_t dst_len;
union {
u8 block0[POLY1305_KEY_SIZE];
u8 mac[POLY1305_MAC_SIZE];
__le64 lens[2];
} b = { { 0 } };
if (unlikely(src_len < POLY1305_MAC_SIZE)) {
printf("src_len too short\n");
return false;
}
chacha20_init(&chacha20_state, key, nonce);
chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0),
simd_context);
poly1305_init(&poly1305_state, b.block0);
poly1305_update(&poly1305_state, ad, ad_len, simd_context);
poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf,
simd_context);
dst_len = src_len - POLY1305_MAC_SIZE;
poly1305_update(&poly1305_state, src, dst_len, simd_context);
poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf,
simd_context);
b.lens[0] = cpu_to_le64(ad_len);
b.lens[1] = cpu_to_le64(dst_len);
poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens),
simd_context);
poly1305_final(&poly1305_state, b.mac, simd_context);
ret = crypto_memneq(b.mac, src + dst_len, POLY1305_MAC_SIZE);
if (likely(!ret))
chacha20(&chacha20_state, dst, src, dst_len, simd_context);
else {
printf("calculated: %16D\n", b.mac, "");
printf("sent : %16D\n", src + dst_len, "");
}
memzero_explicit(&chacha20_state, sizeof(chacha20_state));
memzero_explicit(&b, sizeof(b));
return !ret;
}
bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len,
const u64 nonce,
const u8 key[CHACHA20POLY1305_KEY_SIZE])
{
simd_context_t simd_context;
bool ret;
simd_get(&simd_context);
ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, nonce,
key, &simd_context);
simd_put(&simd_context);
return ret;
}
EXPORT_SYMBOL(chacha20poly1305_decrypt);
void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len,
const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
const u8 key[CHACHA20POLY1305_KEY_SIZE])
{
simd_context_t simd_context;
u32 derived_key[CHACHA20_KEY_WORDS] __aligned(16);
simd_get(&simd_context);
hchacha20(derived_key, nonce, key, &simd_context);
cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key));
__chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len,
get_unaligned_le64(nonce + 16),
(u8 *)derived_key, &simd_context);
memzero_explicit(derived_key, CHACHA20POLY1305_KEY_SIZE);
simd_put(&simd_context);
}
EXPORT_SYMBOL(xchacha20poly1305_encrypt);
bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len,
const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
const u8 key[CHACHA20POLY1305_KEY_SIZE])
{
bool ret;
simd_context_t simd_context;
u32 derived_key[CHACHA20_KEY_WORDS] __aligned(16);
simd_get(&simd_context);
hchacha20(derived_key, nonce, key, &simd_context);
cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key));
ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len,
get_unaligned_le64(nonce + 16),
(u8 *)derived_key, &simd_context);
memzero_explicit(derived_key, CHACHA20POLY1305_KEY_SIZE);
simd_put(&simd_context);
return ret;
}
EXPORT_SYMBOL(xchacha20poly1305_decrypt);
#include "selftest/chacha20poly1305.c"
static int __init mod_init(void)
{
if (!selftest_run("chacha20poly1305", chacha20poly1305_selftest,
NULL, 0))
return -ENOTRECOVERABLE;
return 0;
}
static void __exit mod_exit(void)
{
}
module_init(mod_init);
module_exit(mod_exit);

View File

@ -0,0 +1,140 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include <asm/hwcap.h>
#include <asm/neon.h>
asmlinkage void poly1305_init_arm(void *ctx, const u8 key[16]);
asmlinkage void poly1305_blocks_arm(void *ctx, const u8 *inp, const size_t len,
const u32 padbit);
asmlinkage void poly1305_emit_arm(void *ctx, u8 mac[16], const u32 nonce[4]);
asmlinkage void poly1305_blocks_neon(void *ctx, const u8 *inp, const size_t len,
const u32 padbit);
asmlinkage void poly1305_emit_neon(void *ctx, u8 mac[16], const u32 nonce[4]);
static bool poly1305_use_neon __ro_after_init;
static bool *const poly1305_nobs[] __initconst = { &poly1305_use_neon };
static void __init poly1305_fpu_init(void)
{
#if defined(CONFIG_ZINC_ARCH_ARM64)
poly1305_use_neon = cpu_have_named_feature(ASIMD);
#elif defined(CONFIG_ZINC_ARCH_ARM)
poly1305_use_neon = elf_hwcap & HWCAP_NEON;
#endif
}
#if defined(CONFIG_ZINC_ARCH_ARM64)
struct poly1305_arch_internal {
union {
u32 h[5];
struct {
u64 h0, h1, h2;
};
};
u64 is_base2_26;
u64 r[2];
};
#elif defined(CONFIG_ZINC_ARCH_ARM)
struct poly1305_arch_internal {
union {
u32 h[5];
struct {
u64 h0, h1;
u32 h2;
} __packed;
};
u32 r[4];
u32 is_base2_26;
};
#endif
/* The NEON code uses base 2^26, while the scalar code uses base 2^64 on 64-bit
* and base 2^32 on 32-bit. If we hit the unfortunate situation of using NEON
* and then having to go back to scalar -- because the user is silly and has
* called the update function from two separate contexts -- then we need to
* convert back to the original base before proceeding. The below function is
* written for 64-bit integers, and so we have to swap words at the end on
* big-endian 32-bit. It is possible to reason that the initial reduction below
* is sufficient given the implementation invariants. However, for an avoidance
* of doubt and because this is not performance critical, we do the full
* reduction anyway.
*/
static void convert_to_base2_64(void *ctx)
{
struct poly1305_arch_internal *state = ctx;
u32 cy;
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !state->is_base2_26)
return;
cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy;
cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy;
cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy;
cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy;
state->h0 = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0];
state->h1 = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12);
state->h2 = state->h[4] >> 24;
if (IS_ENABLED(CONFIG_ZINC_ARCH_ARM) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) {
state->h0 = rol64(state->h0, 32);
state->h1 = rol64(state->h1, 32);
}
#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
cy = (state->h2 >> 2) + (state->h2 & ~3ULL);
state->h2 &= 3;
state->h0 += cy;
state->h1 += (cy = ULT(state->h0, cy));
state->h2 += ULT(state->h1, cy);
#undef ULT
state->is_base2_26 = 0;
}
static inline bool poly1305_init_arch(void *ctx,
const u8 key[POLY1305_KEY_SIZE])
{
poly1305_init_arm(ctx, key);
return true;
}
static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp,
size_t len, const u32 padbit,
simd_context_t *simd_context)
{
/* SIMD disables preemption, so relax after processing each page. */
BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE ||
PAGE_SIZE % POLY1305_BLOCK_SIZE);
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !poly1305_use_neon ||
!simd_use(simd_context)) {
convert_to_base2_64(ctx);
poly1305_blocks_arm(ctx, inp, len, padbit);
return true;
}
for (;;) {
const size_t bytes = min_t(size_t, len, PAGE_SIZE);
poly1305_blocks_neon(ctx, inp, bytes, padbit);
len -= bytes;
if (!len)
break;
inp += bytes;
simd_relax(simd_context);
}
return true;
}
static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
const u32 nonce[4],
simd_context_t *simd_context)
{
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !poly1305_use_neon ||
!simd_use(simd_context)) {
convert_to_base2_64(ctx);
poly1305_emit_arm(ctx, mac, nonce);
} else
poly1305_emit_neon(ctx, mac, nonce);
return true;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,974 @@
#!/usr/bin/env perl
# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
#
# This code is taken from the OpenSSL project but the author, Andy Polyakov,
# has relicensed it under the licenses specified in the SPDX header above.
# The original headers, including the original license headers, are
# included below for completeness.
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# This module implements Poly1305 hash for ARMv8.
#
# June 2015
#
# Numbers are cycles per processed byte with poly1305_blocks alone.
#
# IALU/gcc-4.9 NEON
#
# Apple A7 1.86/+5% 0.72
# Cortex-A53 2.69/+58% 1.47
# Cortex-A57 2.70/+7% 1.14
# Denver 1.64/+50% 1.18(*)
# X-Gene 2.13/+68% 2.27
# Mongoose 1.77/+75% 1.12
# Kryo 2.70/+55% 1.13
#
# (*) estimate based on resources availability is less than 1.0,
# i.e. measured result is worse than expected, presumably binary
# translator is not almighty;
$flavour=shift;
if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
if ($flavour && $flavour ne "void") {
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
die "can't locate arm-xlate.pl";
open STDOUT,"| \"$^X\" $xlate $flavour $output";
} else {
open STDOUT,">$output";
}
my ($ctx,$inp,$len,$padbit) = map("x$_",(0..3));
my ($mac,$nonce)=($inp,$len);
my ($h0,$h1,$h2,$r0,$r1,$s1,$t0,$t1,$d0,$d1,$d2) = map("x$_",(4..14));
$code.=<<___;
#ifndef __KERNEL__
# include "arm_arch.h"
.extern OPENSSL_armcap_P
#else
# define poly1305_init poly1305_init_arm
# define poly1305_blocks poly1305_blocks_arm
# define poly1305_emit poly1305_emit_arm
#endif
.text
// forward "declarations" are required for Apple
.globl poly1305_blocks
.globl poly1305_emit
.globl poly1305_init
.type poly1305_init,%function
.align 5
poly1305_init:
cmp $inp,xzr
stp xzr,xzr,[$ctx] // zero hash value
stp xzr,xzr,[$ctx,#16] // [along with is_base2_26]
csel x0,xzr,x0,eq
b.eq .Lno_key
#ifndef __KERNEL__
# ifdef __ILP32__
ldrsw $t1,.LOPENSSL_armcap_P
# else
ldr $t1,.LOPENSSL_armcap_P
# endif
adr $t0,.LOPENSSL_armcap_P
ldr w17,[$t0,$t1]
#endif
ldp $r0,$r1,[$inp] // load key
mov $s1,#0xfffffffc0fffffff
movk $s1,#0x0fff,lsl#48
#ifdef __AARCH64EB__
rev $r0,$r0 // flip bytes
rev $r1,$r1
#endif
and $r0,$r0,$s1 // &=0ffffffc0fffffff
and $s1,$s1,#-4
and $r1,$r1,$s1 // &=0ffffffc0ffffffc
stp $r0,$r1,[$ctx,#32] // save key value
#ifndef __KERNEL__
tst w17,#ARMV7_NEON
adr $d0,poly1305_blocks
adr $r0,poly1305_blocks_neon
adr $d1,poly1305_emit
adr $r1,poly1305_emit_neon
csel $d0,$d0,$r0,eq
csel $d1,$d1,$r1,eq
# ifdef __ILP32__
stp w12,w13,[$len]
# else
stp $d0,$d1,[$len]
# endif
mov x0,#1
#else
mov x0,#0
#endif
.Lno_key:
ret
.size poly1305_init,.-poly1305_init
.type poly1305_blocks,%function
.align 5
poly1305_blocks:
ands $len,$len,#-16
b.eq .Lno_data
ldp $h0,$h1,[$ctx] // load hash value
ldp $r0,$r1,[$ctx,#32] // load key value
ldr $h2,[$ctx,#16]
add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
b .Loop
.align 5
.Loop:
ldp $t0,$t1,[$inp],#16 // load input
sub $len,$len,#16
#ifdef __AARCH64EB__
rev $t0,$t0
rev $t1,$t1
#endif
adds $h0,$h0,$t0 // accumulate input
adcs $h1,$h1,$t1
mul $d0,$h0,$r0 // h0*r0
adc $h2,$h2,$padbit
umulh $d1,$h0,$r0
mul $t0,$h1,$s1 // h1*5*r1
umulh $t1,$h1,$s1
adds $d0,$d0,$t0
mul $t0,$h0,$r1 // h0*r1
adc $d1,$d1,$t1
umulh $d2,$h0,$r1
adds $d1,$d1,$t0
mul $t0,$h1,$r0 // h1*r0
adc $d2,$d2,xzr
umulh $t1,$h1,$r0
adds $d1,$d1,$t0
mul $t0,$h2,$s1 // h2*5*r1
adc $d2,$d2,$t1
mul $t1,$h2,$r0 // h2*r0
adds $d1,$d1,$t0
adc $d2,$d2,$t1
and $t0,$d2,#-4 // final reduction
and $h2,$d2,#3
add $t0,$t0,$d2,lsr#2
adds $h0,$d0,$t0
adcs $h1,$d1,xzr
adc $h2,$h2,xzr
cbnz $len,.Loop
stp $h0,$h1,[$ctx] // store hash value
str $h2,[$ctx,#16]
.Lno_data:
ret
.size poly1305_blocks,.-poly1305_blocks
.type poly1305_emit,%function
.align 5
poly1305_emit:
ldp $h0,$h1,[$ctx] // load hash base 2^64
ldr $h2,[$ctx,#16]
ldp $t0,$t1,[$nonce] // load nonce
adds $d0,$h0,#5 // compare to modulus
adcs $d1,$h1,xzr
adc $d2,$h2,xzr
tst $d2,#-4 // see if it's carried/borrowed
csel $h0,$h0,$d0,eq
csel $h1,$h1,$d1,eq
#ifdef __AARCH64EB__
ror $t0,$t0,#32 // flip nonce words
ror $t1,$t1,#32
#endif
adds $h0,$h0,$t0 // accumulate nonce
adc $h1,$h1,$t1
#ifdef __AARCH64EB__
rev $h0,$h0 // flip output bytes
rev $h1,$h1
#endif
stp $h0,$h1,[$mac] // write result
ret
.size poly1305_emit,.-poly1305_emit
___
my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("v$_.4s",(0..8));
my ($IN01_0,$IN01_1,$IN01_2,$IN01_3,$IN01_4) = map("v$_.2s",(9..13));
my ($IN23_0,$IN23_1,$IN23_2,$IN23_3,$IN23_4) = map("v$_.2s",(14..18));
my ($ACC0,$ACC1,$ACC2,$ACC3,$ACC4) = map("v$_.2d",(19..23));
my ($H0,$H1,$H2,$H3,$H4) = map("v$_.2s",(24..28));
my ($T0,$T1,$MASK) = map("v$_",(29..31));
my ($in2,$zeros)=("x16","x17");
my $is_base2_26 = $zeros; # borrow
$code.=<<___;
.type __poly1305_mult,%function
.align 5
__poly1305_mult:
mul $d0,$h0,$r0 // h0*r0
umulh $d1,$h0,$r0
mul $t0,$h1,$s1 // h1*5*r1
umulh $t1,$h1,$s1
adds $d0,$d0,$t0
mul $t0,$h0,$r1 // h0*r1
adc $d1,$d1,$t1
umulh $d2,$h0,$r1
adds $d1,$d1,$t0
mul $t0,$h1,$r0 // h1*r0
adc $d2,$d2,xzr
umulh $t1,$h1,$r0
adds $d1,$d1,$t0
mul $t0,$h2,$s1 // h2*5*r1
adc $d2,$d2,$t1
mul $t1,$h2,$r0 // h2*r0
adds $d1,$d1,$t0
adc $d2,$d2,$t1
and $t0,$d2,#-4 // final reduction
and $h2,$d2,#3
add $t0,$t0,$d2,lsr#2
adds $h0,$d0,$t0
adcs $h1,$d1,xzr
adc $h2,$h2,xzr
ret
.size __poly1305_mult,.-__poly1305_mult
.type __poly1305_splat,%function
.align 5
__poly1305_splat:
and x12,$h0,#0x03ffffff // base 2^64 -> base 2^26
ubfx x13,$h0,#26,#26
extr x14,$h1,$h0,#52
and x14,x14,#0x03ffffff
ubfx x15,$h1,#14,#26
extr x16,$h2,$h1,#40
str w12,[$ctx,#16*0] // r0
add w12,w13,w13,lsl#2 // r1*5
str w13,[$ctx,#16*1] // r1
add w13,w14,w14,lsl#2 // r2*5
str w12,[$ctx,#16*2] // s1
str w14,[$ctx,#16*3] // r2
add w14,w15,w15,lsl#2 // r3*5
str w13,[$ctx,#16*4] // s2
str w15,[$ctx,#16*5] // r3
add w15,w16,w16,lsl#2 // r4*5
str w14,[$ctx,#16*6] // s3
str w16,[$ctx,#16*7] // r4
str w15,[$ctx,#16*8] // s4
ret
.size __poly1305_splat,.-__poly1305_splat
#if !defined(__KERNEL__) || defined(CONFIG_KERNEL_MODE_NEON)
#ifdef __KERNEL__
.globl poly1305_blocks_neon
.globl poly1305_emit_neon
#endif
.type poly1305_blocks_neon,%function
.align 5
poly1305_blocks_neon:
ldr $is_base2_26,[$ctx,#24]
cmp $len,#128
b.hs .Lblocks_neon
cbz $is_base2_26,poly1305_blocks
.Lblocks_neon:
stp x29,x30,[sp,#-80]!
add x29,sp,#0
ands $len,$len,#-16
b.eq .Lno_data_neon
cbz $is_base2_26,.Lbase2_64_neon
ldp w10,w11,[$ctx] // load hash value base 2^26
ldp w12,w13,[$ctx,#8]
ldr w14,[$ctx,#16]
tst $len,#31
b.eq .Leven_neon
ldp $r0,$r1,[$ctx,#32] // load key value
add $h0,x10,x11,lsl#26 // base 2^26 -> base 2^64
lsr $h1,x12,#12
adds $h0,$h0,x12,lsl#52
add $h1,$h1,x13,lsl#14
adc $h1,$h1,xzr
lsr $h2,x14,#24
adds $h1,$h1,x14,lsl#40
adc $d2,$h2,xzr // can be partially reduced...
ldp $d0,$d1,[$inp],#16 // load input
sub $len,$len,#16
add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
and $t0,$d2,#-4 // ... so reduce
and $h2,$d2,#3
add $t0,$t0,$d2,lsr#2
adds $h0,$h0,$t0
adcs $h1,$h1,xzr
adc $h2,$h2,xzr
#ifdef __AARCH64EB__
rev $d0,$d0
rev $d1,$d1
#endif
adds $h0,$h0,$d0 // accumulate input
adcs $h1,$h1,$d1
adc $h2,$h2,$padbit
bl __poly1305_mult
ldr x30,[sp,#8]
cbz $padbit,.Lstore_base2_64_neon
and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26
ubfx x11,$h0,#26,#26
extr x12,$h1,$h0,#52
and x12,x12,#0x03ffffff
ubfx x13,$h1,#14,#26
extr x14,$h2,$h1,#40
cbnz $len,.Leven_neon
stp w10,w11,[$ctx] // store hash value base 2^26
stp w12,w13,[$ctx,#8]
str w14,[$ctx,#16]
b .Lno_data_neon
.align 4
.Lstore_base2_64_neon:
stp $h0,$h1,[$ctx] // store hash value base 2^64
stp $h2,xzr,[$ctx,#16] // note that is_base2_26 is zeroed
b .Lno_data_neon
.align 4
.Lbase2_64_neon:
ldp $r0,$r1,[$ctx,#32] // load key value
ldp $h0,$h1,[$ctx] // load hash value base 2^64
ldr $h2,[$ctx,#16]
tst $len,#31
b.eq .Linit_neon
ldp $d0,$d1,[$inp],#16 // load input
sub $len,$len,#16
add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
#ifdef __AARCH64EB__
rev $d0,$d0
rev $d1,$d1
#endif
adds $h0,$h0,$d0 // accumulate input
adcs $h1,$h1,$d1
adc $h2,$h2,$padbit
bl __poly1305_mult
.Linit_neon:
and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26
ubfx x11,$h0,#26,#26
extr x12,$h1,$h0,#52
and x12,x12,#0x03ffffff
ubfx x13,$h1,#14,#26
extr x14,$h2,$h1,#40
stp d8,d9,[sp,#16] // meet ABI requirements
stp d10,d11,[sp,#32]
stp d12,d13,[sp,#48]
stp d14,d15,[sp,#64]
fmov ${H0},x10
fmov ${H1},x11
fmov ${H2},x12
fmov ${H3},x13
fmov ${H4},x14
////////////////////////////////// initialize r^n table
mov $h0,$r0 // r^1
add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
mov $h1,$r1
mov $h2,xzr
add $ctx,$ctx,#48+12
bl __poly1305_splat
bl __poly1305_mult // r^2
sub $ctx,$ctx,#4
bl __poly1305_splat
bl __poly1305_mult // r^3
sub $ctx,$ctx,#4
bl __poly1305_splat
bl __poly1305_mult // r^4
sub $ctx,$ctx,#4
bl __poly1305_splat
ldr x30,[sp,#8]
add $in2,$inp,#32
adr $zeros,.Lzeros
subs $len,$len,#64
csel $in2,$zeros,$in2,lo
mov x4,#1
str x4,[$ctx,#-24] // set is_base2_26
sub $ctx,$ctx,#48 // restore original $ctx
b .Ldo_neon
.align 4
.Leven_neon:
add $in2,$inp,#32
adr $zeros,.Lzeros
subs $len,$len,#64
csel $in2,$zeros,$in2,lo
stp d8,d9,[sp,#16] // meet ABI requirements
stp d10,d11,[sp,#32]
stp d12,d13,[sp,#48]
stp d14,d15,[sp,#64]
fmov ${H0},x10
fmov ${H1},x11
fmov ${H2},x12
fmov ${H3},x13
fmov ${H4},x14
.Ldo_neon:
ldp x8,x12,[$in2],#16 // inp[2:3] (or zero)
ldp x9,x13,[$in2],#48
lsl $padbit,$padbit,#24
add x15,$ctx,#48
#ifdef __AARCH64EB__
rev x8,x8
rev x12,x12
rev x9,x9
rev x13,x13
#endif
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
and x5,x9,#0x03ffffff
ubfx x6,x8,#26,#26
ubfx x7,x9,#26,#26
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
extr x8,x12,x8,#52
extr x9,x13,x9,#52
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
fmov $IN23_0,x4
and x8,x8,#0x03ffffff
and x9,x9,#0x03ffffff
ubfx x10,x12,#14,#26
ubfx x11,x13,#14,#26
add x12,$padbit,x12,lsr#40
add x13,$padbit,x13,lsr#40
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
fmov $IN23_1,x6
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
fmov $IN23_2,x8
fmov $IN23_3,x10
fmov $IN23_4,x12
ldp x8,x12,[$inp],#16 // inp[0:1]
ldp x9,x13,[$inp],#48
ld1 {$R0,$R1,$S1,$R2},[x15],#64
ld1 {$S2,$R3,$S3,$R4},[x15],#64
ld1 {$S4},[x15]
#ifdef __AARCH64EB__
rev x8,x8
rev x12,x12
rev x9,x9
rev x13,x13
#endif
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
and x5,x9,#0x03ffffff
ubfx x6,x8,#26,#26
ubfx x7,x9,#26,#26
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
extr x8,x12,x8,#52
extr x9,x13,x9,#52
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
fmov $IN01_0,x4
and x8,x8,#0x03ffffff
and x9,x9,#0x03ffffff
ubfx x10,x12,#14,#26
ubfx x11,x13,#14,#26
add x12,$padbit,x12,lsr#40
add x13,$padbit,x13,lsr#40
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
fmov $IN01_1,x6
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
movi $MASK.2d,#-1
fmov $IN01_2,x8
fmov $IN01_3,x10
fmov $IN01_4,x12
ushr $MASK.2d,$MASK.2d,#38
b.ls .Lskip_loop
.align 4
.Loop_neon:
////////////////////////////////////////////////////////////////
// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
// \___________________/
// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
// \___________________/ \____________________/
//
// Note that we start with inp[2:3]*r^2. This is because it
// doesn't depend on reduction in previous iteration.
////////////////////////////////////////////////////////////////
// d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0
// d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4
// d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3
// d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2
// d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
subs $len,$len,#64
umull $ACC4,$IN23_0,${R4}[2]
csel $in2,$zeros,$in2,lo
umull $ACC3,$IN23_0,${R3}[2]
umull $ACC2,$IN23_0,${R2}[2]
ldp x8,x12,[$in2],#16 // inp[2:3] (or zero)
umull $ACC1,$IN23_0,${R1}[2]
ldp x9,x13,[$in2],#48
umull $ACC0,$IN23_0,${R0}[2]
#ifdef __AARCH64EB__
rev x8,x8
rev x12,x12
rev x9,x9
rev x13,x13
#endif
umlal $ACC4,$IN23_1,${R3}[2]
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
umlal $ACC3,$IN23_1,${R2}[2]
and x5,x9,#0x03ffffff
umlal $ACC2,$IN23_1,${R1}[2]
ubfx x6,x8,#26,#26
umlal $ACC1,$IN23_1,${R0}[2]
ubfx x7,x9,#26,#26
umlal $ACC0,$IN23_1,${S4}[2]
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
umlal $ACC4,$IN23_2,${R2}[2]
extr x8,x12,x8,#52
umlal $ACC3,$IN23_2,${R1}[2]
extr x9,x13,x9,#52
umlal $ACC2,$IN23_2,${R0}[2]
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
umlal $ACC1,$IN23_2,${S4}[2]
fmov $IN23_0,x4
umlal $ACC0,$IN23_2,${S3}[2]
and x8,x8,#0x03ffffff
umlal $ACC4,$IN23_3,${R1}[2]
and x9,x9,#0x03ffffff
umlal $ACC3,$IN23_3,${R0}[2]
ubfx x10,x12,#14,#26
umlal $ACC2,$IN23_3,${S4}[2]
ubfx x11,x13,#14,#26
umlal $ACC1,$IN23_3,${S3}[2]
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
umlal $ACC0,$IN23_3,${S2}[2]
fmov $IN23_1,x6
add $IN01_2,$IN01_2,$H2
add x12,$padbit,x12,lsr#40
umlal $ACC4,$IN23_4,${R0}[2]
add x13,$padbit,x13,lsr#40
umlal $ACC3,$IN23_4,${S4}[2]
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
umlal $ACC2,$IN23_4,${S3}[2]
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
umlal $ACC1,$IN23_4,${S2}[2]
fmov $IN23_2,x8
umlal $ACC0,$IN23_4,${S1}[2]
fmov $IN23_3,x10
////////////////////////////////////////////////////////////////
// (hash+inp[0:1])*r^4 and accumulate
add $IN01_0,$IN01_0,$H0
fmov $IN23_4,x12
umlal $ACC3,$IN01_2,${R1}[0]
ldp x8,x12,[$inp],#16 // inp[0:1]
umlal $ACC0,$IN01_2,${S3}[0]
ldp x9,x13,[$inp],#48
umlal $ACC4,$IN01_2,${R2}[0]
umlal $ACC1,$IN01_2,${S4}[0]
umlal $ACC2,$IN01_2,${R0}[0]
#ifdef __AARCH64EB__
rev x8,x8
rev x12,x12
rev x9,x9
rev x13,x13
#endif
add $IN01_1,$IN01_1,$H1
umlal $ACC3,$IN01_0,${R3}[0]
umlal $ACC4,$IN01_0,${R4}[0]
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
umlal $ACC2,$IN01_0,${R2}[0]
and x5,x9,#0x03ffffff
umlal $ACC0,$IN01_0,${R0}[0]
ubfx x6,x8,#26,#26
umlal $ACC1,$IN01_0,${R1}[0]
ubfx x7,x9,#26,#26
add $IN01_3,$IN01_3,$H3
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
umlal $ACC3,$IN01_1,${R2}[0]
extr x8,x12,x8,#52
umlal $ACC4,$IN01_1,${R3}[0]
extr x9,x13,x9,#52
umlal $ACC0,$IN01_1,${S4}[0]
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
umlal $ACC2,$IN01_1,${R1}[0]
fmov $IN01_0,x4
umlal $ACC1,$IN01_1,${R0}[0]
and x8,x8,#0x03ffffff
add $IN01_4,$IN01_4,$H4
and x9,x9,#0x03ffffff
umlal $ACC3,$IN01_3,${R0}[0]
ubfx x10,x12,#14,#26
umlal $ACC0,$IN01_3,${S2}[0]
ubfx x11,x13,#14,#26
umlal $ACC4,$IN01_3,${R1}[0]
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
umlal $ACC1,$IN01_3,${S3}[0]
fmov $IN01_1,x6
umlal $ACC2,$IN01_3,${S4}[0]
add x12,$padbit,x12,lsr#40
umlal $ACC3,$IN01_4,${S4}[0]
add x13,$padbit,x13,lsr#40
umlal $ACC0,$IN01_4,${S1}[0]
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
umlal $ACC4,$IN01_4,${R0}[0]
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
umlal $ACC1,$IN01_4,${S2}[0]
fmov $IN01_2,x8
umlal $ACC2,$IN01_4,${S3}[0]
fmov $IN01_3,x10
fmov $IN01_4,x12
/////////////////////////////////////////////////////////////////
// lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
// and P. Schwabe
//
// [see discussion in poly1305-armv4 module]
ushr $T0.2d,$ACC3,#26
xtn $H3,$ACC3
ushr $T1.2d,$ACC0,#26
and $ACC0,$ACC0,$MASK.2d
add $ACC4,$ACC4,$T0.2d // h3 -> h4
bic $H3,#0xfc,lsl#24 // &=0x03ffffff
add $ACC1,$ACC1,$T1.2d // h0 -> h1
ushr $T0.2d,$ACC4,#26
xtn $H4,$ACC4
ushr $T1.2d,$ACC1,#26
xtn $H1,$ACC1
bic $H4,#0xfc,lsl#24
add $ACC2,$ACC2,$T1.2d // h1 -> h2
add $ACC0,$ACC0,$T0.2d
shl $T0.2d,$T0.2d,#2
shrn $T1.2s,$ACC2,#26
xtn $H2,$ACC2
add $ACC0,$ACC0,$T0.2d // h4 -> h0
bic $H1,#0xfc,lsl#24
add $H3,$H3,$T1.2s // h2 -> h3
bic $H2,#0xfc,lsl#24
shrn $T0.2s,$ACC0,#26
xtn $H0,$ACC0
ushr $T1.2s,$H3,#26
bic $H3,#0xfc,lsl#24
bic $H0,#0xfc,lsl#24
add $H1,$H1,$T0.2s // h0 -> h1
add $H4,$H4,$T1.2s // h3 -> h4
b.hi .Loop_neon
.Lskip_loop:
dup $IN23_2,${IN23_2}[0]
add $IN01_2,$IN01_2,$H2
////////////////////////////////////////////////////////////////
// multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
adds $len,$len,#32
b.ne .Long_tail
dup $IN23_2,${IN01_2}[0]
add $IN23_0,$IN01_0,$H0
add $IN23_3,$IN01_3,$H3
add $IN23_1,$IN01_1,$H1
add $IN23_4,$IN01_4,$H4
.Long_tail:
dup $IN23_0,${IN23_0}[0]
umull2 $ACC0,$IN23_2,${S3}
umull2 $ACC3,$IN23_2,${R1}
umull2 $ACC4,$IN23_2,${R2}
umull2 $ACC2,$IN23_2,${R0}
umull2 $ACC1,$IN23_2,${S4}
dup $IN23_1,${IN23_1}[0]
umlal2 $ACC0,$IN23_0,${R0}
umlal2 $ACC2,$IN23_0,${R2}
umlal2 $ACC3,$IN23_0,${R3}
umlal2 $ACC4,$IN23_0,${R4}
umlal2 $ACC1,$IN23_0,${R1}
dup $IN23_3,${IN23_3}[0]
umlal2 $ACC0,$IN23_1,${S4}
umlal2 $ACC3,$IN23_1,${R2}
umlal2 $ACC2,$IN23_1,${R1}
umlal2 $ACC4,$IN23_1,${R3}
umlal2 $ACC1,$IN23_1,${R0}
dup $IN23_4,${IN23_4}[0]
umlal2 $ACC3,$IN23_3,${R0}
umlal2 $ACC4,$IN23_3,${R1}
umlal2 $ACC0,$IN23_3,${S2}
umlal2 $ACC1,$IN23_3,${S3}
umlal2 $ACC2,$IN23_3,${S4}
umlal2 $ACC3,$IN23_4,${S4}
umlal2 $ACC0,$IN23_4,${S1}
umlal2 $ACC4,$IN23_4,${R0}
umlal2 $ACC1,$IN23_4,${S2}
umlal2 $ACC2,$IN23_4,${S3}
b.eq .Lshort_tail
////////////////////////////////////////////////////////////////
// (hash+inp[0:1])*r^4:r^3 and accumulate
add $IN01_0,$IN01_0,$H0
umlal $ACC3,$IN01_2,${R1}
umlal $ACC0,$IN01_2,${S3}
umlal $ACC4,$IN01_2,${R2}
umlal $ACC1,$IN01_2,${S4}
umlal $ACC2,$IN01_2,${R0}
add $IN01_1,$IN01_1,$H1
umlal $ACC3,$IN01_0,${R3}
umlal $ACC0,$IN01_0,${R0}
umlal $ACC4,$IN01_0,${R4}
umlal $ACC1,$IN01_0,${R1}
umlal $ACC2,$IN01_0,${R2}
add $IN01_3,$IN01_3,$H3
umlal $ACC3,$IN01_1,${R2}
umlal $ACC0,$IN01_1,${S4}
umlal $ACC4,$IN01_1,${R3}
umlal $ACC1,$IN01_1,${R0}
umlal $ACC2,$IN01_1,${R1}
add $IN01_4,$IN01_4,$H4
umlal $ACC3,$IN01_3,${R0}
umlal $ACC0,$IN01_3,${S2}
umlal $ACC4,$IN01_3,${R1}
umlal $ACC1,$IN01_3,${S3}
umlal $ACC2,$IN01_3,${S4}
umlal $ACC3,$IN01_4,${S4}
umlal $ACC0,$IN01_4,${S1}
umlal $ACC4,$IN01_4,${R0}
umlal $ACC1,$IN01_4,${S2}
umlal $ACC2,$IN01_4,${S3}
.Lshort_tail:
////////////////////////////////////////////////////////////////
// horizontal add
addp $ACC3,$ACC3,$ACC3
ldp d8,d9,[sp,#16] // meet ABI requirements
addp $ACC0,$ACC0,$ACC0
ldp d10,d11,[sp,#32]
addp $ACC4,$ACC4,$ACC4
ldp d12,d13,[sp,#48]
addp $ACC1,$ACC1,$ACC1
ldp d14,d15,[sp,#64]
addp $ACC2,$ACC2,$ACC2
////////////////////////////////////////////////////////////////
// lazy reduction, but without narrowing
ushr $T0.2d,$ACC3,#26
and $ACC3,$ACC3,$MASK.2d
ushr $T1.2d,$ACC0,#26
and $ACC0,$ACC0,$MASK.2d
add $ACC4,$ACC4,$T0.2d // h3 -> h4
add $ACC1,$ACC1,$T1.2d // h0 -> h1
ushr $T0.2d,$ACC4,#26
and $ACC4,$ACC4,$MASK.2d
ushr $T1.2d,$ACC1,#26
and $ACC1,$ACC1,$MASK.2d
add $ACC2,$ACC2,$T1.2d // h1 -> h2
add $ACC0,$ACC0,$T0.2d
shl $T0.2d,$T0.2d,#2
ushr $T1.2d,$ACC2,#26
and $ACC2,$ACC2,$MASK.2d
add $ACC0,$ACC0,$T0.2d // h4 -> h0
add $ACC3,$ACC3,$T1.2d // h2 -> h3
ushr $T0.2d,$ACC0,#26
and $ACC0,$ACC0,$MASK.2d
ushr $T1.2d,$ACC3,#26
and $ACC3,$ACC3,$MASK.2d
add $ACC1,$ACC1,$T0.2d // h0 -> h1
add $ACC4,$ACC4,$T1.2d // h3 -> h4
////////////////////////////////////////////////////////////////
// write the result, can be partially reduced
st4 {$ACC0,$ACC1,$ACC2,$ACC3}[0],[$ctx],#16
st1 {$ACC4}[0],[$ctx]
.Lno_data_neon:
ldr x29,[sp],#80
ret
.size poly1305_blocks_neon,.-poly1305_blocks_neon
.type poly1305_emit_neon,%function
.align 5
poly1305_emit_neon:
ldr $is_base2_26,[$ctx,#24]
cbz $is_base2_26,poly1305_emit
ldp w10,w11,[$ctx] // load hash value base 2^26
ldp w12,w13,[$ctx,#8]
ldr w14,[$ctx,#16]
add $h0,x10,x11,lsl#26 // base 2^26 -> base 2^64
lsr $h1,x12,#12
adds $h0,$h0,x12,lsl#52
add $h1,$h1,x13,lsl#14
adc $h1,$h1,xzr
lsr $h2,x14,#24
adds $h1,$h1,x14,lsl#40
adc $h2,$h2,xzr // can be partially reduced...
ldp $t0,$t1,[$nonce] // load nonce
and $d0,$h2,#-4 // ... so reduce
add $d0,$d0,$h2,lsr#2
and $h2,$h2,#3
adds $h0,$h0,$d0
adcs $h1,$h1,xzr
adc $h2,$h2,xzr
adds $d0,$h0,#5 // compare to modulus
adcs $d1,$h1,xzr
adc $d2,$h2,xzr
tst $d2,#-4 // see if it's carried/borrowed
csel $h0,$h0,$d0,eq
csel $h1,$h1,$d1,eq
#ifdef __AARCH64EB__
ror $t0,$t0,#32 // flip nonce words
ror $t1,$t1,#32
#endif
adds $h0,$h0,$t0 // accumulate nonce
adc $h1,$h1,$t1
#ifdef __AARCH64EB__
rev $h0,$h0 // flip output bytes
rev $h1,$h1
#endif
stp $h0,$h1,[$mac] // write result
ret
.size poly1305_emit_neon,.-poly1305_emit_neon
#endif
.align 5
.Lzeros:
.long 0,0,0,0,0,0,0,0
#ifndef __KERNEL__
.LOPENSSL_armcap_P:
#ifdef __ILP32__
.long OPENSSL_armcap_P-.
#else
.quad OPENSSL_armcap_P-.
#endif
#endif
.align 2
___
open SELF,$0;
while(<SELF>) {
next if (/^#!/);
last if (!s/^#/\/\// and !/^$/);
print;
}
close SELF;
foreach (split("\n",$code)) {
s/\b(shrn\s+v[0-9]+)\.[24]d/$1.2s/ or
s/\b(fmov\s+)v([0-9]+)[^,]*,\s*x([0-9]+)/$1d$2,x$3/ or
(m/\bdup\b/ and (s/\.[24]s/.2d/g or 1)) or
(m/\b(eor|and)/ and (s/\.[248][sdh]/.16b/g or 1)) or
(m/\bum(ul|la)l\b/ and (s/\.4s/.2s/g or 1)) or
(m/\bum(ul|la)l2\b/ and (s/\.2s/.4s/g or 1)) or
(m/\bst[1-4]\s+{[^}]+}\[/ and (s/\.[24]d/.s/g or 1));
s/\.[124]([sd])\[/.$1\[/;
print $_,"\n";
}
close STDOUT;

View File

@ -0,0 +1,205 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* This is based in part on Andrew Moon's poly1305-donna, which is in the
* public domain.
*/
struct poly1305_internal {
u32 h[5];
u32 r[5];
u32 s[4];
};
static void poly1305_init_generic(void *ctx, const u8 key[16])
{
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
st->r[0] = (get_unaligned_le32(&key[0])) & 0x3ffffff;
st->r[1] = (get_unaligned_le32(&key[3]) >> 2) & 0x3ffff03;
st->r[2] = (get_unaligned_le32(&key[6]) >> 4) & 0x3ffc0ff;
st->r[3] = (get_unaligned_le32(&key[9]) >> 6) & 0x3f03fff;
st->r[4] = (get_unaligned_le32(&key[12]) >> 8) & 0x00fffff;
/* s = 5*r */
st->s[0] = st->r[1] * 5;
st->s[1] = st->r[2] * 5;
st->s[2] = st->r[3] * 5;
st->s[3] = st->r[4] * 5;
/* h = 0 */
st->h[0] = 0;
st->h[1] = 0;
st->h[2] = 0;
st->h[3] = 0;
st->h[4] = 0;
}
static void poly1305_blocks_generic(void *ctx, const u8 *input, size_t len,
const u32 padbit)
{
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
const u32 hibit = padbit << 24;
u32 r0, r1, r2, r3, r4;
u32 s1, s2, s3, s4;
u32 h0, h1, h2, h3, h4;
u64 d0, d1, d2, d3, d4;
u32 c;
r0 = st->r[0];
r1 = st->r[1];
r2 = st->r[2];
r3 = st->r[3];
r4 = st->r[4];
s1 = st->s[0];
s2 = st->s[1];
s3 = st->s[2];
s4 = st->s[3];
h0 = st->h[0];
h1 = st->h[1];
h2 = st->h[2];
h3 = st->h[3];
h4 = st->h[4];
while (len >= POLY1305_BLOCK_SIZE) {
/* h += m[i] */
h0 += (get_unaligned_le32(&input[0])) & 0x3ffffff;
h1 += (get_unaligned_le32(&input[3]) >> 2) & 0x3ffffff;
h2 += (get_unaligned_le32(&input[6]) >> 4) & 0x3ffffff;
h3 += (get_unaligned_le32(&input[9]) >> 6) & 0x3ffffff;
h4 += (get_unaligned_le32(&input[12]) >> 8) | hibit;
/* h *= r */
d0 = ((u64)h0 * r0) + ((u64)h1 * s4) +
((u64)h2 * s3) + ((u64)h3 * s2) +
((u64)h4 * s1);
d1 = ((u64)h0 * r1) + ((u64)h1 * r0) +
((u64)h2 * s4) + ((u64)h3 * s3) +
((u64)h4 * s2);
d2 = ((u64)h0 * r2) + ((u64)h1 * r1) +
((u64)h2 * r0) + ((u64)h3 * s4) +
((u64)h4 * s3);
d3 = ((u64)h0 * r3) + ((u64)h1 * r2) +
((u64)h2 * r1) + ((u64)h3 * r0) +
((u64)h4 * s4);
d4 = ((u64)h0 * r4) + ((u64)h1 * r3) +
((u64)h2 * r2) + ((u64)h3 * r1) +
((u64)h4 * r0);
/* (partial) h %= p */
c = (u32)(d0 >> 26);
h0 = (u32)d0 & 0x3ffffff;
d1 += c;
c = (u32)(d1 >> 26);
h1 = (u32)d1 & 0x3ffffff;
d2 += c;
c = (u32)(d2 >> 26);
h2 = (u32)d2 & 0x3ffffff;
d3 += c;
c = (u32)(d3 >> 26);
h3 = (u32)d3 & 0x3ffffff;
d4 += c;
c = (u32)(d4 >> 26);
h4 = (u32)d4 & 0x3ffffff;
h0 += c * 5;
c = (h0 >> 26);
h0 = h0 & 0x3ffffff;
h1 += c;
input += POLY1305_BLOCK_SIZE;
len -= POLY1305_BLOCK_SIZE;
}
st->h[0] = h0;
st->h[1] = h1;
st->h[2] = h2;
st->h[3] = h3;
st->h[4] = h4;
}
static void poly1305_emit_generic(void *ctx, u8 mac[16], const u32 nonce[4])
{
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
u32 h0, h1, h2, h3, h4, c;
u32 g0, g1, g2, g3, g4;
u64 f;
u32 mask;
/* fully carry h */
h0 = st->h[0];
h1 = st->h[1];
h2 = st->h[2];
h3 = st->h[3];
h4 = st->h[4];
c = h1 >> 26;
h1 = h1 & 0x3ffffff;
h2 += c;
c = h2 >> 26;
h2 = h2 & 0x3ffffff;
h3 += c;
c = h3 >> 26;
h3 = h3 & 0x3ffffff;
h4 += c;
c = h4 >> 26;
h4 = h4 & 0x3ffffff;
h0 += c * 5;
c = h0 >> 26;
h0 = h0 & 0x3ffffff;
h1 += c;
/* compute h + -p */
g0 = h0 + 5;
c = g0 >> 26;
g0 &= 0x3ffffff;
g1 = h1 + c;
c = g1 >> 26;
g1 &= 0x3ffffff;
g2 = h2 + c;
c = g2 >> 26;
g2 &= 0x3ffffff;
g3 = h3 + c;
c = g3 >> 26;
g3 &= 0x3ffffff;
g4 = h4 + c - (1UL << 26);
/* select h if h < p, or h + -p if h >= p */
mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
g0 &= mask;
g1 &= mask;
g2 &= mask;
g3 &= mask;
g4 &= mask;
mask = ~mask;
h0 = (h0 & mask) | g0;
h1 = (h1 & mask) | g1;
h2 = (h2 & mask) | g2;
h3 = (h3 & mask) | g3;
h4 = (h4 & mask) | g4;
/* h = h % (2^128) */
h0 = ((h0) | (h1 << 26)) & 0xffffffff;
h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
/* mac = (h + nonce) % (2^128) */
f = (u64)h0 + nonce[0];
h0 = (u32)f;
f = (u64)h1 + nonce[1] + (f >> 32);
h1 = (u32)f;
f = (u64)h2 + nonce[2] + (f >> 32);
h2 = (u32)f;
f = (u64)h3 + nonce[3] + (f >> 32);
h3 = (u32)f;
put_unaligned_le32(h0, &mac[0]);
put_unaligned_le32(h1, &mac[4]);
put_unaligned_le32(h2, &mac[8]);
put_unaligned_le32(h3, &mac[12]);
}

View File

@ -0,0 +1,182 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* This is based in part on Andrew Moon's poly1305-donna, which is in the
* public domain.
*/
typedef __uint128_t u128;
struct poly1305_internal {
u64 r[3];
u64 h[3];
u64 s[2];
};
static void poly1305_init_generic(void *ctx, const u8 key[16])
{
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
u64 t0, t1;
/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
t0 = get_unaligned_le64(&key[0]);
t1 = get_unaligned_le64(&key[8]);
st->r[0] = t0 & 0xffc0fffffffULL;
st->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffffULL;
st->r[2] = ((t1 >> 24)) & 0x00ffffffc0fULL;
/* s = 20*r */
st->s[0] = st->r[1] * 20;
st->s[1] = st->r[2] * 20;
/* h = 0 */
st->h[0] = 0;
st->h[1] = 0;
st->h[2] = 0;
}
static void poly1305_blocks_generic(void *ctx, const u8 *input, size_t len,
const u32 padbit)
{
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
const u64 hibit = ((u64)padbit) << 40;
u64 r0, r1, r2;
u64 s1, s2;
u64 h0, h1, h2;
u64 c;
u128 d0, d1, d2, d;
r0 = st->r[0];
r1 = st->r[1];
r2 = st->r[2];
h0 = st->h[0];
h1 = st->h[1];
h2 = st->h[2];
s1 = st->s[0];
s2 = st->s[1];
while (len >= POLY1305_BLOCK_SIZE) {
u64 t0, t1;
/* h += m[i] */
t0 = get_unaligned_le64(&input[0]);
t1 = get_unaligned_le64(&input[8]);
h0 += t0 & 0xfffffffffffULL;
h1 += ((t0 >> 44) | (t1 << 20)) & 0xfffffffffffULL;
h2 += (((t1 >> 24)) & 0x3ffffffffffULL) | hibit;
/* h *= r */
d0 = (u128)h0 * r0;
d = (u128)h1 * s2;
d0 += d;
d = (u128)h2 * s1;
d0 += d;
d1 = (u128)h0 * r1;
d = (u128)h1 * r0;
d1 += d;
d = (u128)h2 * s2;
d1 += d;
d2 = (u128)h0 * r2;
d = (u128)h1 * r1;
d2 += d;
d = (u128)h2 * r0;
d2 += d;
/* (partial) h %= p */
c = (u64)(d0 >> 44);
h0 = (u64)d0 & 0xfffffffffffULL;
d1 += c;
c = (u64)(d1 >> 44);
h1 = (u64)d1 & 0xfffffffffffULL;
d2 += c;
c = (u64)(d2 >> 42);
h2 = (u64)d2 & 0x3ffffffffffULL;
h0 += c * 5;
c = h0 >> 44;
h0 = h0 & 0xfffffffffffULL;
h1 += c;
input += POLY1305_BLOCK_SIZE;
len -= POLY1305_BLOCK_SIZE;
}
st->h[0] = h0;
st->h[1] = h1;
st->h[2] = h2;
}
static void poly1305_emit_generic(void *ctx, u8 mac[16], const u32 nonce[4])
{
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
u64 h0, h1, h2, c;
u64 g0, g1, g2;
u64 t0, t1;
/* fully carry h */
h0 = st->h[0];
h1 = st->h[1];
h2 = st->h[2];
c = h1 >> 44;
h1 &= 0xfffffffffffULL;
h2 += c;
c = h2 >> 42;
h2 &= 0x3ffffffffffULL;
h0 += c * 5;
c = h0 >> 44;
h0 &= 0xfffffffffffULL;
h1 += c;
c = h1 >> 44;
h1 &= 0xfffffffffffULL;
h2 += c;
c = h2 >> 42;
h2 &= 0x3ffffffffffULL;
h0 += c * 5;
c = h0 >> 44;
h0 &= 0xfffffffffffULL;
h1 += c;
/* compute h + -p */
g0 = h0 + 5;
c = g0 >> 44;
g0 &= 0xfffffffffffULL;
g1 = h1 + c;
c = g1 >> 44;
g1 &= 0xfffffffffffULL;
g2 = h2 + c - (1ULL << 42);
/* select h if h < p, or h + -p if h >= p */
c = (g2 >> ((sizeof(u64) * 8) - 1)) - 1;
g0 &= c;
g1 &= c;
g2 &= c;
c = ~c;
h0 = (h0 & c) | g0;
h1 = (h1 & c) | g1;
h2 = (h2 & c) | g2;
/* h = (h + nonce) */
t0 = ((u64)nonce[1] << 32) | nonce[0];
t1 = ((u64)nonce[3] << 32) | nonce[2];
h0 += t0 & 0xfffffffffffULL;
c = h0 >> 44;
h0 &= 0xfffffffffffULL;
h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffffULL) + c;
c = h1 >> 44;
h1 &= 0xfffffffffffULL;
h2 += (((t1 >> 24)) & 0x3ffffffffffULL) + c;
h2 &= 0x3ffffffffffULL;
/* mac = h % (2^128) */
h0 = h0 | (h1 << 44);
h1 = (h1 >> 20) | (h2 << 24);
put_unaligned_le64(h0, &mac[0]);
put_unaligned_le64(h1, &mac[8]);
}

View File

@ -0,0 +1,37 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
asmlinkage void poly1305_init_mips(void *ctx, const u8 key[16]);
asmlinkage void poly1305_blocks_mips(void *ctx, const u8 *inp, const size_t len,
const u32 padbit);
asmlinkage void poly1305_emit_mips(void *ctx, u8 mac[16], const u32 nonce[4]);
static bool *const poly1305_nobs[] __initconst = { };
static void __init poly1305_fpu_init(void)
{
}
static inline bool poly1305_init_arch(void *ctx,
const u8 key[POLY1305_KEY_SIZE])
{
poly1305_init_mips(ctx, key);
return true;
}
static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp,
size_t len, const u32 padbit,
simd_context_t *simd_context)
{
poly1305_blocks_mips(ctx, inp, len, padbit);
return true;
}
static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
const u32 nonce[4],
simd_context_t *simd_context)
{
poly1305_emit_mips(ctx, mac, nonce);
return true;
}

View File

@ -0,0 +1,407 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com> All Rights Reserved.
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define MSB 0
#define LSB 3
#else
#define MSB 3
#define LSB 0
#endif
#define POLY1305_BLOCK_SIZE 16
.text
#define H0 $t0
#define H1 $t1
#define H2 $t2
#define H3 $t3
#define H4 $t4
#define R0 $t5
#define R1 $t6
#define R2 $t7
#define R3 $t8
#define O0 $s0
#define O1 $s4
#define O2 $v1
#define O3 $t9
#define O4 $s5
#define S1 $s1
#define S2 $s2
#define S3 $s3
#define SC $at
#define CA $v0
/* Input arguments */
#define poly $a0
#define src $a1
#define srclen $a2
#define hibit $a3
/* Location in the opaque buffer
* R[0..3], CA, H[0..4]
*/
#define PTR_POLY1305_R(n) ( 0 + (n*4)) ## ($a0)
#define PTR_POLY1305_CA (16 ) ## ($a0)
#define PTR_POLY1305_H(n) (20 + (n*4)) ## ($a0)
#define POLY1305_BLOCK_SIZE 16
#define POLY1305_STACK_SIZE 32
.set noat
.align 4
.globl poly1305_blocks_mips
.ent poly1305_blocks_mips
poly1305_blocks_mips:
.frame $sp, POLY1305_STACK_SIZE, $ra
/* srclen &= 0xFFFFFFF0 */
ins srclen, $zero, 0, 4
addiu $sp, -(POLY1305_STACK_SIZE)
/* check srclen >= 16 bytes */
beqz srclen, .Lpoly1305_blocks_mips_end
/* Calculate last round based on src address pointer.
* last round src ptr (srclen) = src + (srclen & 0xFFFFFFF0)
*/
addu srclen, src
lw R0, PTR_POLY1305_R(0)
lw R1, PTR_POLY1305_R(1)
lw R2, PTR_POLY1305_R(2)
lw R3, PTR_POLY1305_R(3)
/* store the used save registers. */
sw $s0, 0($sp)
sw $s1, 4($sp)
sw $s2, 8($sp)
sw $s3, 12($sp)
sw $s4, 16($sp)
sw $s5, 20($sp)
/* load Hx and Carry */
lw CA, PTR_POLY1305_CA
lw H0, PTR_POLY1305_H(0)
lw H1, PTR_POLY1305_H(1)
lw H2, PTR_POLY1305_H(2)
lw H3, PTR_POLY1305_H(3)
lw H4, PTR_POLY1305_H(4)
/* Sx = Rx + (Rx >> 2) */
srl S1, R1, 2
srl S2, R2, 2
srl S3, R3, 2
addu S1, R1
addu S2, R2
addu S3, R3
addiu SC, $zero, 1
.Lpoly1305_loop:
lwl O0, 0+MSB(src)
lwl O1, 4+MSB(src)
lwl O2, 8+MSB(src)
lwl O3,12+MSB(src)
lwr O0, 0+LSB(src)
lwr O1, 4+LSB(src)
lwr O2, 8+LSB(src)
lwr O3,12+LSB(src)
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
wsbh O0
wsbh O1
wsbh O2
wsbh O3
rotr O0, 16
rotr O1, 16
rotr O2, 16
rotr O3, 16
#endif
/* h0 = (u32)(d0 = (u64)h0 + inp[0] + c 'Carry_previous cycle'); */
addu H0, CA
sltu CA, H0, CA
addu O0, H0
sltu H0, O0, H0
addu CA, H0
/* h1 = (u32)(d1 = (u64)h1 + (d0 >> 32) + inp[4]); */
addu H1, CA
sltu CA, H1, CA
addu O1, H1
sltu H1, O1, H1
addu CA, H1
/* h2 = (u32)(d2 = (u64)h2 + (d1 >> 32) + inp[8]); */
addu H2, CA
sltu CA, H2, CA
addu O2, H2
sltu H2, O2, H2
addu CA, H2
/* h3 = (u32)(d3 = (u64)h3 + (d2 >> 32) + inp[12]); */
addu H3, CA
sltu CA, H3, CA
addu O3, H3
sltu H3, O3, H3
addu CA, H3
/* h4 += (u32)(d3 >> 32) + padbit; */
addu H4, hibit
addu O4, H4, CA
/* D0 */
multu O0, R0
maddu O1, S3
maddu O2, S2
maddu O3, S1
mfhi CA
mflo H0
/* D1 */
multu O0, R1
maddu O1, R0
maddu O2, S3
maddu O3, S2
maddu O4, S1
maddu CA, SC
mfhi CA
mflo H1
/* D2 */
multu O0, R2
maddu O1, R1
maddu O2, R0
maddu O3, S3
maddu O4, S2
maddu CA, SC
mfhi CA
mflo H2
/* D4 */
mul H4, O4, R0
/* D3 */
multu O0, R3
maddu O1, R2
maddu O2, R1
maddu O3, R0
maddu O4, S3
maddu CA, SC
mfhi CA
mflo H3
addiu src, POLY1305_BLOCK_SIZE
/* h4 += (u32)(d3 >> 32); */
addu O4, H4, CA
/* h4 &= 3 */
andi H4, O4, 3
/* c = (h4 >> 2) + (h4 & ~3U); */
srl CA, O4, 2
ins O4, $zero, 0, 2
addu CA, O4
/* able to do a 16 byte block. */
bne src, srclen, .Lpoly1305_loop
/* restore the used save registers. */
lw $s0, 0($sp)
lw $s1, 4($sp)
lw $s2, 8($sp)
lw $s3, 12($sp)
lw $s4, 16($sp)
lw $s5, 20($sp)
/* store Hx and Carry */
sw CA, PTR_POLY1305_CA
sw H0, PTR_POLY1305_H(0)
sw H1, PTR_POLY1305_H(1)
sw H2, PTR_POLY1305_H(2)
sw H3, PTR_POLY1305_H(3)
sw H4, PTR_POLY1305_H(4)
.Lpoly1305_blocks_mips_end:
addiu $sp, POLY1305_STACK_SIZE
/* Jump Back */
jr $ra
.end poly1305_blocks_mips
.set at
/* Input arguments CTX=$a0, MAC=$a1, NONCE=$a2 */
#define MAC $a1
#define NONCE $a2
#define G0 $t5
#define G1 $t6
#define G2 $t7
#define G3 $t8
#define G4 $t9
.set noat
.align 4
.globl poly1305_emit_mips
.ent poly1305_emit_mips
poly1305_emit_mips:
/* load Hx and Carry */
lw CA, PTR_POLY1305_CA
lw H0, PTR_POLY1305_H(0)
lw H1, PTR_POLY1305_H(1)
lw H2, PTR_POLY1305_H(2)
lw H3, PTR_POLY1305_H(3)
lw H4, PTR_POLY1305_H(4)
/* Add left over carry */
addu H0, CA
sltu CA, H0, CA
addu H1, CA
sltu CA, H1, CA
addu H2, CA
sltu CA, H2, CA
addu H3, CA
sltu CA, H3, CA
addu H4, CA
/* compare to modulus by computing h + -p */
addiu G0, H0, 5
sltu CA, G0, H0
addu G1, H1, CA
sltu CA, G1, H1
addu G2, H2, CA
sltu CA, G2, H2
addu G3, H3, CA
sltu CA, G3, H3
addu G4, H4, CA
srl SC, G4, 2
/* if there was carry into 131st bit, h3:h0 = g3:g0 */
movn H0, G0, SC
movn H1, G1, SC
movn H2, G2, SC
movn H3, G3, SC
lwl G0, 0+MSB(NONCE)
lwl G1, 4+MSB(NONCE)
lwl G2, 8+MSB(NONCE)
lwl G3,12+MSB(NONCE)
lwr G0, 0+LSB(NONCE)
lwr G1, 4+LSB(NONCE)
lwr G2, 8+LSB(NONCE)
lwr G3,12+LSB(NONCE)
/* mac = (h + nonce) % (2^128) */
addu H0, G0
sltu CA, H0, G0
/* H1 */
addu H1, CA
sltu CA, H1, CA
addu H1, G1
sltu G1, H1, G1
addu CA, G1
/* H2 */
addu H2, CA
sltu CA, H2, CA
addu H2, G2
sltu G2, H2, G2
addu CA, G2
/* H3 */
addu H3, CA
addu H3, G3
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
wsbh H0
wsbh H1
wsbh H2
wsbh H3
rotr H0, 16
rotr H1, 16
rotr H2, 16
rotr H3, 16
#endif
/* store MAC */
swl H0, 0+MSB(MAC)
swl H1, 4+MSB(MAC)
swl H2, 8+MSB(MAC)
swl H3,12+MSB(MAC)
swr H0, 0+LSB(MAC)
swr H1, 4+LSB(MAC)
swr H2, 8+LSB(MAC)
swr H3,12+LSB(MAC)
jr $ra
.end poly1305_emit_mips
#define PR0 $t0
#define PR1 $t1
#define PR2 $t2
#define PR3 $t3
#define PT0 $t4
/* Input arguments CTX=$a0, KEY=$a1 */
.align 4
.globl poly1305_init_mips
.ent poly1305_init_mips
poly1305_init_mips:
lwl PR0, 0+MSB($a1)
lwl PR1, 4+MSB($a1)
lwl PR2, 8+MSB($a1)
lwl PR3,12+MSB($a1)
lwr PR0, 0+LSB($a1)
lwr PR1, 4+LSB($a1)
lwr PR2, 8+LSB($a1)
lwr PR3,12+LSB($a1)
/* store Hx and Carry */
sw $zero, PTR_POLY1305_CA
sw $zero, PTR_POLY1305_H(0)
sw $zero, PTR_POLY1305_H(1)
sw $zero, PTR_POLY1305_H(2)
sw $zero, PTR_POLY1305_H(3)
sw $zero, PTR_POLY1305_H(4)
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
wsbh PR0
wsbh PR1
wsbh PR2
wsbh PR3
rotr PR0, 16
rotr PR1, 16
rotr PR2, 16
rotr PR3, 16
#endif
lui PT0, 0x0FFF
ori PT0, 0xFFFC
/* AND 0x0fffffff; */
ext PR0, PR0, 0, (32-4)
/* AND 0x0ffffffc; */
and PR1, PT0
and PR2, PT0
and PR3, PT0
/* store Rx */
sw PR0, PTR_POLY1305_R(0)
sw PR1, PTR_POLY1305_R(1)
sw PR2, PTR_POLY1305_R(2)
sw PR3, PTR_POLY1305_R(3)
/* Jump Back */
jr $ra
.end poly1305_init_mips

View File

@ -0,0 +1,467 @@
#!/usr/bin/env perl
# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
#
# This code is taken from the OpenSSL project but the author, Andy Polyakov,
# has relicensed it under the licenses specified in the SPDX header above.
# The original headers, including the original license headers, are
# included below for completeness.
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# Poly1305 hash for MIPS64.
#
# May 2016
#
# Numbers are cycles per processed byte with poly1305_blocks alone.
#
# IALU/gcc
# R1x000 5.64/+120% (big-endian)
# Octeon II 3.80/+280% (little-endian)
######################################################################
# There is a number of MIPS ABI in use, O32 and N32/64 are most
# widely used. Then there is a new contender: NUBI. It appears that if
# one picks the latter, it's possible to arrange code in ABI neutral
# manner. Therefore let's stick to NUBI register layout:
#
($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
#
# The return value is placed in $a0. Following coding rules facilitate
# interoperability:
#
# - never ever touch $tp, "thread pointer", former $gp [o32 can be
# excluded from the rule, because it's specified volatile];
# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
# old code];
# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
#
# For reference here is register layout for N32/64 MIPS ABIs:
#
# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
#
# <appro@openssl.org>
#
######################################################################
$flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64
die "MIPS64 only" unless ($flavour =~ /64|n32/i);
$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0;
$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000";
($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1);
$code.=<<___;
#if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\
defined(_MIPS_ARCH_MIPS64R6)) \\
&& !defined(_MIPS_ARCH_MIPS64R2)
# define _MIPS_ARCH_MIPS64R2
#endif
#if defined(_MIPS_ARCH_MIPS64R6)
# define dmultu(rs,rt)
# define mflo(rd,rs,rt) dmulu rd,rs,rt
# define mfhi(rd,rs,rt) dmuhu rd,rs,rt
#else
# define dmultu(rs,rt) dmultu rs,rt
# define mflo(rd,rs,rt) mflo rd
# define mfhi(rd,rs,rt) mfhi rd
#endif
#ifdef __KERNEL__
# define poly1305_init poly1305_init_mips
# define poly1305_blocks poly1305_blocks_mips
# define poly1305_emit poly1305_emit_mips
#endif
#if defined(__MIPSEB__) && !defined(MIPSEB)
# define MIPSEB
#endif
#ifdef MIPSEB
# define MSB 0
# define LSB 7
#else
# define MSB 7
# define LSB 0
#endif
.text
.set noat
.set noreorder
.align 5
.globl poly1305_init
.ent poly1305_init
poly1305_init:
.frame $sp,0,$ra
.set reorder
sd $zero,0($ctx)
sd $zero,8($ctx)
sd $zero,16($ctx)
beqz $inp,.Lno_key
#if defined(_MIPS_ARCH_MIPS64R6)
ld $in0,0($inp)
ld $in1,8($inp)
#else
ldl $in0,0+MSB($inp)
ldl $in1,8+MSB($inp)
ldr $in0,0+LSB($inp)
ldr $in1,8+LSB($inp)
#endif
#ifdef MIPSEB
# if defined(_MIPS_ARCH_MIPS64R2)
dsbh $in0,$in0 # byte swap
dsbh $in1,$in1
dshd $in0,$in0
dshd $in1,$in1
# else
ori $tmp0,$zero,0xFF
dsll $tmp2,$tmp0,32
or $tmp0,$tmp2 # 0x000000FF000000FF
and $tmp1,$in0,$tmp0 # byte swap
and $tmp3,$in1,$tmp0
dsrl $tmp2,$in0,24
dsrl $tmp4,$in1,24
dsll $tmp1,24
dsll $tmp3,24
and $tmp2,$tmp0
and $tmp4,$tmp0
dsll $tmp0,8 # 0x0000FF000000FF00
or $tmp1,$tmp2
or $tmp3,$tmp4
and $tmp2,$in0,$tmp0
and $tmp4,$in1,$tmp0
dsrl $in0,8
dsrl $in1,8
dsll $tmp2,8
dsll $tmp4,8
and $in0,$tmp0
and $in1,$tmp0
or $tmp1,$tmp2
or $tmp3,$tmp4
or $in0,$tmp1
or $in1,$tmp3
dsrl $tmp1,$in0,32
dsrl $tmp3,$in1,32
dsll $in0,32
dsll $in1,32
or $in0,$tmp1
or $in1,$tmp3
# endif
#endif
li $tmp0,1
dsll $tmp0,32
daddiu $tmp0,-63
dsll $tmp0,28
daddiu $tmp0,-1 # 0ffffffc0fffffff
and $in0,$tmp0
daddiu $tmp0,-3 # 0ffffffc0ffffffc
and $in1,$tmp0
sd $in0,24($ctx)
dsrl $tmp0,$in1,2
sd $in1,32($ctx)
daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2)
sd $tmp0,40($ctx)
.Lno_key:
li $v0,0 # return 0
jr $ra
.end poly1305_init
___
{
my ($h0,$h1,$h2,$r0,$r1,$s1,$d0,$d1,$d2) =
($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2);
$code.=<<___;
.align 5
.globl poly1305_blocks
.ent poly1305_blocks
poly1305_blocks:
.set noreorder
dsrl $len,4 # number of complete blocks
bnez $len,poly1305_blocks_internal
nop
jr $ra
nop
.end poly1305_blocks
.align 5
.ent poly1305_blocks_internal
poly1305_blocks_internal:
.frame $sp,6*8,$ra
.mask $SAVED_REGS_MASK,-8
.set noreorder
dsubu $sp,6*8
sd $s5,40($sp)
sd $s4,32($sp)
___
$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
sd $s3,24($sp)
sd $s2,16($sp)
sd $s1,8($sp)
sd $s0,0($sp)
___
$code.=<<___;
.set reorder
ld $h0,0($ctx) # load hash value
ld $h1,8($ctx)
ld $h2,16($ctx)
ld $r0,24($ctx) # load key
ld $r1,32($ctx)
ld $s1,40($ctx)
.Loop:
#if defined(_MIPS_ARCH_MIPS64R6)
ld $in0,0($inp) # load input
ld $in1,8($inp)
#else
ldl $in0,0+MSB($inp) # load input
ldl $in1,8+MSB($inp)
ldr $in0,0+LSB($inp)
ldr $in1,8+LSB($inp)
#endif
daddiu $len,-1
daddiu $inp,16
#ifdef MIPSEB
# if defined(_MIPS_ARCH_MIPS64R2)
dsbh $in0,$in0 # byte swap
dsbh $in1,$in1
dshd $in0,$in0
dshd $in1,$in1
# else
ori $tmp0,$zero,0xFF
dsll $tmp2,$tmp0,32
or $tmp0,$tmp2 # 0x000000FF000000FF
and $tmp1,$in0,$tmp0 # byte swap
and $tmp3,$in1,$tmp0
dsrl $tmp2,$in0,24
dsrl $tmp4,$in1,24
dsll $tmp1,24
dsll $tmp3,24
and $tmp2,$tmp0
and $tmp4,$tmp0
dsll $tmp0,8 # 0x0000FF000000FF00
or $tmp1,$tmp2
or $tmp3,$tmp4
and $tmp2,$in0,$tmp0
and $tmp4,$in1,$tmp0
dsrl $in0,8
dsrl $in1,8
dsll $tmp2,8
dsll $tmp4,8
and $in0,$tmp0
and $in1,$tmp0
or $tmp1,$tmp2
or $tmp3,$tmp4
or $in0,$tmp1
or $in1,$tmp3
dsrl $tmp1,$in0,32
dsrl $tmp3,$in1,32
dsll $in0,32
dsll $in1,32
or $in0,$tmp1
or $in1,$tmp3
# endif
#endif
daddu $h0,$in0 # accumulate input
daddu $h1,$in1
sltu $tmp0,$h0,$in0
sltu $tmp1,$h1,$in1
daddu $h1,$tmp0
dmultu ($r0,$h0) # h0*r0
daddu $h2,$padbit
sltu $tmp0,$h1,$tmp0
mflo ($d0,$r0,$h0)
mfhi ($d1,$r0,$h0)
dmultu ($s1,$h1) # h1*5*r1
daddu $tmp0,$tmp1
daddu $h2,$tmp0
mflo ($tmp0,$s1,$h1)
mfhi ($tmp1,$s1,$h1)
dmultu ($r1,$h0) # h0*r1
daddu $d0,$tmp0
daddu $d1,$tmp1
mflo ($tmp2,$r1,$h0)
mfhi ($d2,$r1,$h0)
sltu $tmp0,$d0,$tmp0
daddu $d1,$tmp0
dmultu ($r0,$h1) # h1*r0
daddu $d1,$tmp2
sltu $tmp2,$d1,$tmp2
mflo ($tmp0,$r0,$h1)
mfhi ($tmp1,$r0,$h1)
daddu $d2,$tmp2
dmultu ($s1,$h2) # h2*5*r1
daddu $d1,$tmp0
daddu $d2,$tmp1
mflo ($tmp2,$s1,$h2)
dmultu ($r0,$h2) # h2*r0
sltu $tmp0,$d1,$tmp0
daddu $d2,$tmp0
mflo ($tmp3,$r0,$h2)
daddu $d1,$tmp2
daddu $d2,$tmp3
sltu $tmp2,$d1,$tmp2
daddu $d2,$tmp2
li $tmp0,-4 # final reduction
and $tmp0,$d2
dsrl $tmp1,$d2,2
andi $h2,$d2,3
daddu $tmp0,$tmp1
daddu $h0,$d0,$tmp0
sltu $tmp0,$h0,$tmp0
daddu $h1,$d1,$tmp0
sltu $tmp0,$h1,$tmp0
daddu $h2,$h2,$tmp0
bnez $len,.Loop
sd $h0,0($ctx) # store hash value
sd $h1,8($ctx)
sd $h2,16($ctx)
.set noreorder
ld $s5,40($sp) # epilogue
ld $s4,32($sp)
___
$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue
ld $s3,24($sp)
ld $s2,16($sp)
ld $s1,8($sp)
ld $s0,0($sp)
___
$code.=<<___;
jr $ra
daddu $sp,6*8
.end poly1305_blocks_internal
___
}
{
my ($ctx,$mac,$nonce) = ($a0,$a1,$a2);
$code.=<<___;
.align 5
.globl poly1305_emit
.ent poly1305_emit
poly1305_emit:
.frame $sp,0,$ra
.set reorder
ld $tmp0,0($ctx)
ld $tmp1,8($ctx)
ld $tmp2,16($ctx)
daddiu $in0,$tmp0,5 # compare to modulus
sltiu $tmp3,$in0,5
daddu $in1,$tmp1,$tmp3
sltu $tmp3,$in1,$tmp3
daddu $tmp2,$tmp2,$tmp3
dsrl $tmp2,2 # see if it carried/borrowed
dsubu $tmp2,$zero,$tmp2
nor $tmp3,$zero,$tmp2
and $in0,$tmp2
and $tmp0,$tmp3
and $in1,$tmp2
and $tmp1,$tmp3
or $in0,$tmp0
or $in1,$tmp1
lwu $tmp0,0($nonce) # load nonce
lwu $tmp1,4($nonce)
lwu $tmp2,8($nonce)
lwu $tmp3,12($nonce)
dsll $tmp1,32
dsll $tmp3,32
or $tmp0,$tmp1
or $tmp2,$tmp3
daddu $in0,$tmp0 # accumulate nonce
daddu $in1,$tmp2
sltu $tmp0,$in0,$tmp0
daddu $in1,$tmp0
dsrl $tmp0,$in0,8 # write mac value
dsrl $tmp1,$in0,16
dsrl $tmp2,$in0,24
sb $in0,0($mac)
dsrl $tmp3,$in0,32
sb $tmp0,1($mac)
dsrl $tmp0,$in0,40
sb $tmp1,2($mac)
dsrl $tmp1,$in0,48
sb $tmp2,3($mac)
dsrl $tmp2,$in0,56
sb $tmp3,4($mac)
dsrl $tmp3,$in1,8
sb $tmp0,5($mac)
dsrl $tmp0,$in1,16
sb $tmp1,6($mac)
dsrl $tmp1,$in1,24
sb $tmp2,7($mac)
sb $in1,8($mac)
dsrl $tmp2,$in1,32
sb $tmp3,9($mac)
dsrl $tmp3,$in1,40
sb $tmp0,10($mac)
dsrl $tmp0,$in1,48
sb $tmp1,11($mac)
dsrl $tmp1,$in1,56
sb $tmp2,12($mac)
sb $tmp3,13($mac)
sb $tmp0,14($mac)
sb $tmp1,15($mac)
jr $ra
.end poly1305_emit
.rdata
.align 2
___
}
open SELF,$0;
while(<SELF>) {
next if (/^#!/);
last if (!s/^#/\/\// and !/^$/);
print;
}
close SELF;
$output=pop and open STDOUT,">$output";
print $code;
close STDOUT;

View File

@ -0,0 +1,171 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifdef __linux__
#include <asm/cpufeature.h>
#include <asm/processor.h>
#include <asm/intel-family.h>
#else
#include <sys/simd-x86_64.h>
#endif
asmlinkage void poly1305_init_x86_64(void *ctx,
const u8 key[POLY1305_KEY_SIZE]);
asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp,
const size_t len, const u32 padbit);
asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_MAC_SIZE],
const u32 nonce[4]);
asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[POLY1305_MAC_SIZE],
const u32 nonce[4]);
asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, const size_t len,
const u32 padbit);
asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, const size_t len,
const u32 padbit);
asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp,
const size_t len, const u32 padbit);
static bool poly1305_use_avx __ro_after_init;
static bool poly1305_use_avx2 __ro_after_init;
static bool poly1305_use_avx512 __ro_after_init;
static bool *const poly1305_nobs[] __initconst = {
&poly1305_use_avx, &poly1305_use_avx2, &poly1305_use_avx512 };
static void __init poly1305_fpu_init(void)
{
#ifdef __linux__
poly1305_use_avx =
boot_cpu_has(X86_FEATURE_AVX) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
poly1305_use_avx2 =
boot_cpu_has(X86_FEATURE_AVX) &&
boot_cpu_has(X86_FEATURE_AVX2) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
#ifndef COMPAT_CANNOT_USE_AVX512
poly1305_use_avx512 =
boot_cpu_has(X86_FEATURE_AVX) &&
boot_cpu_has(X86_FEATURE_AVX2) &&
boot_cpu_has(X86_FEATURE_AVX512F) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
XFEATURE_MASK_AVX512, NULL) &&
/* Skylake downclocks unacceptably much when using zmm. */
boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X;
#endif
#else
poly1305_use_avx = !!(cpu_feature2 & CPUID2_AVX) &&
__ymm_enabled();
poly1305_use_avx2 = poly1305_use_avx &&
!!(cpu_stdext_feature & CPUID_STDEXT_AVX2);
poly1305_use_avx512 = poly1305_use_avx2 &&
!!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) &&
__zmm_enabled();
#endif
}
static inline bool poly1305_init_arch(void *ctx,
const u8 key[POLY1305_KEY_SIZE])
{
poly1305_init_x86_64(ctx, key);
return true;
}
struct poly1305_arch_internal {
union {
struct {
u32 h[5];
u32 is_base2_26;
};
u64 hs[3];
};
u64 r[2];
u64 pad;
struct { u32 r2, r1, r4, r3; } rn[9];
};
/* The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit
* the unfortunate situation of using AVX and then having to go back to scalar
* -- because the user is silly and has called the update function from two
* separate contexts -- then we need to convert back to the original base before
* proceeding. It is possible to reason that the initial reduction below is
* sufficient given the implementation invariants. However, for an avoidance of
* doubt and because this is not performance critical, we do the full reduction
* anyway.
*/
static void convert_to_base2_64(void *ctx)
{
struct poly1305_arch_internal *state = ctx;
u32 cy;
if (!state->is_base2_26)
return;
cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy;
cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy;
cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy;
cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy;
state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0];
state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12);
state->hs[2] = state->h[4] >> 24;
#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL);
state->hs[2] &= 3;
state->hs[0] += cy;
state->hs[1] += (cy = ULT(state->hs[0], cy));
state->hs[2] += ULT(state->hs[1], cy);
#undef ULT
state->is_base2_26 = 0;
}
static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp,
size_t len, const u32 padbit,
simd_context_t *simd_context)
{
struct poly1305_arch_internal *state = ctx;
/* SIMD disables preemption, so relax after processing each page. */
BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE ||
PAGE_SIZE % POLY1305_BLOCK_SIZE);
if (!poly1305_use_avx ||
(len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) ||
!simd_use(simd_context)) {
convert_to_base2_64(ctx);
poly1305_blocks_x86_64(ctx, inp, len, padbit);
return true;
}
for (;;) {
const size_t bytes = min_t(size_t, len, PAGE_SIZE);
if (poly1305_use_avx512)
poly1305_blocks_avx512(ctx, inp, bytes, padbit);
else if (poly1305_use_avx2)
poly1305_blocks_avx2(ctx, inp, bytes, padbit);
else
poly1305_blocks_avx(ctx, inp, bytes, padbit);
len -= bytes;
if (!len)
break;
inp += bytes;
simd_relax(simd_context);
}
return true;
}
static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
const u32 nonce[4],
simd_context_t *simd_context)
{
struct poly1305_arch_internal *state = ctx;
if (!IS_ENABLED(CONFIG_AS_AVX) || !poly1305_use_avx ||
!state->is_base2_26 || !simd_use(simd_context)) {
convert_to_base2_64(ctx);
poly1305_emit_x86_64(ctx, mac, nonce);
} else
poly1305_emit_avx(ctx, mac, nonce);
return true;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,163 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* Implementation of the Poly1305 message authenticator.
*
* Information: https://cr.yp.to/mac.html
*/
#include <sys/support.h>
#include <zinc/poly1305.h>
#include "../selftest/run.h"
#if defined(CONFIG_ZINC_ARCH_X86_64)
#include "poly1305-x86_64-glue.c"
#elif defined(CONFIG_ZINC_ARCH_ARM) || defined(CONFIG_ZINC_ARCH_ARM64)
#include "poly1305-arm-glue.c"
#elif defined(CONFIG_ZINC_ARCH_MIPS) || defined(CONFIG_ZINC_ARCH_MIPS64)
#include "poly1305-mips-glue.c"
#else
static inline bool poly1305_init_arch(void *ctx,
const u8 key[POLY1305_KEY_SIZE])
{
return false;
}
static inline bool poly1305_blocks_arch(void *ctx, const u8 *input,
size_t len, const u32 padbit,
simd_context_t *simd_context)
{
return false;
}
static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
const u32 nonce[4],
simd_context_t *simd_context)
{
return false;
}
static bool *const poly1305_nobs[] __initconst = { };
static void __init poly1305_fpu_init(void)
{
}
#endif
#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
#include "poly1305-donna64.c"
#else
#include "poly1305-donna32.c"
#endif
void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE])
{
ctx->nonce[0] = get_unaligned_le32(&key[16]);
ctx->nonce[1] = get_unaligned_le32(&key[20]);
ctx->nonce[2] = get_unaligned_le32(&key[24]);
ctx->nonce[3] = get_unaligned_le32(&key[28]);
if (!poly1305_init_arch(ctx->opaque, key))
poly1305_init_generic(ctx->opaque, key);
ctx->num = 0;
}
EXPORT_SYMBOL(poly1305_init);
static inline void poly1305_blocks(void *ctx, const u8 *input, const size_t len,
const u32 padbit,
simd_context_t *simd_context)
{
if (!poly1305_blocks_arch(ctx, input, len, padbit, simd_context))
poly1305_blocks_generic(ctx, input, len, padbit);
}
static inline void poly1305_emit(void *ctx, u8 mac[POLY1305_KEY_SIZE],
const u32 nonce[4],
simd_context_t *simd_context)
{
if (!poly1305_emit_arch(ctx, mac, nonce, simd_context))
poly1305_emit_generic(ctx, mac, nonce);
}
void poly1305_update(struct poly1305_ctx *ctx, const u8 *input, size_t len,
simd_context_t *simd_context)
{
const size_t num = ctx->num;
size_t rem;
if (num) {
rem = POLY1305_BLOCK_SIZE - num;
if (len < rem) {
memcpy(ctx->data + num, input, len);
ctx->num = num + len;
return;
}
memcpy(ctx->data + num, input, rem);
poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 1,
simd_context);
input += rem;
len -= rem;
}
rem = len % POLY1305_BLOCK_SIZE;
len -= rem;
if (len >= POLY1305_BLOCK_SIZE) {
poly1305_blocks(ctx->opaque, input, len, 1, simd_context);
input += len;
}
if (rem)
memcpy(ctx->data, input, rem);
ctx->num = rem;
}
EXPORT_SYMBOL(poly1305_update);
void poly1305_final(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE],
simd_context_t *simd_context)
{
size_t num = ctx->num;
if (num) {
ctx->data[num++] = 1;
while (num < POLY1305_BLOCK_SIZE)
ctx->data[num++] = 0;
poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 0,
simd_context);
}
poly1305_emit(ctx->opaque, mac, ctx->nonce, simd_context);
memzero_explicit(ctx, sizeof(*ctx));
}
EXPORT_SYMBOL(poly1305_final);
#include "../selftest/poly1305.c"
static bool nosimd __initdata = false;
#ifndef COMPAT_ZINC_IS_A_MODULE
int __init poly1305_mod_init(void)
#else
static int __init mod_init(void)
#endif
{
if (!nosimd)
poly1305_fpu_init();
if (!selftest_run("poly1305", poly1305_selftest, poly1305_nobs,
ARRAY_SIZE(poly1305_nobs)))
return -ENOTRECOVERABLE;
return 0;
}
#ifdef COMPAT_ZINC_IS_A_MODULE
static void __exit mod_exit(void)
{
}
module_param(nosimd, bool, 0);
module_init(mod_init);
module_exit(mod_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("Poly1305 one-time authenticator");
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,43 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _ZINC_SELFTEST_RUN_H
#define _ZINC_SELFTEST_RUN_H
static inline bool selftest_run(const char *name, bool (*selftest)(void),
bool *const nobs[], unsigned int nobs_len)
{
unsigned long set = 0, subset = 0, largest_subset = 0;
unsigned int i;
bool failed;
MPASS(nobs_len <= BITS_PER_LONG);
failed = false;
for (i = 0; i < nobs_len; ++i)
set |= ((unsigned long)*nobs[i]) << i;
do {
for (i = 0; i < nobs_len; ++i)
*nobs[i] = BIT(i) & subset;
if (selftest())
largest_subset = max(subset, largest_subset);
else {
failed = true;
pr_err("%s self-test combination 0x%lx: FAIL\n", name,
subset);
}
subset = (subset - set) & set;
} while (subset);
for (i = 0; i < nobs_len; ++i)
*nobs[i] = BIT(i) & largest_subset;
if (largest_subset == set && !failed && bootverbose)
pr_info("%s self-tests: pass\n", name);
return !WARN_ON(largest_subset != set);
}
#endif

View File

@ -0,0 +1,866 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2016 The fiat-crypto Authors.
* Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* This is a machine-generated formally verified implementation of Curve25519
* ECDH from: <https://github.com/mit-plv/fiat-crypto>. Though originally
* machine generated, it has been tweaked to be suitable for use in the kernel.
* It is optimized for 32-bit machines and machines that cannot work efficiently
* with 128-bit integer types.
*/
/* Added for compatibility */
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/support.h>
#include <crypto/curve25519.h>
/* fe means field element. Here the field is \Z/(2^255-19). An element t,
* entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
* t[3]+2^102 t[4]+...+2^230 t[9].
* fe limbs are bounded by 1.125*2^26,1.125*2^25,1.125*2^26,1.125*2^25,etc.
* Multiplication and carrying produce fe from fe_loose.
*/
typedef struct fe { u32 v[10]; } fe;
/* fe_loose limbs are bounded by 3.375*2^26,3.375*2^25,3.375*2^26,3.375*2^25,etc
* Addition and subtraction produce fe_loose from (fe, fe).
*/
typedef struct fe_loose { u32 v[10]; } fe_loose;
static __always_inline void fe_frombytes_impl(u32 h[10], const u8 *s)
{
/* Ignores top bit of s. */
u32 a0 = get_unaligned_le32(s);
u32 a1 = get_unaligned_le32(s+4);
u32 a2 = get_unaligned_le32(s+8);
u32 a3 = get_unaligned_le32(s+12);
u32 a4 = get_unaligned_le32(s+16);
u32 a5 = get_unaligned_le32(s+20);
u32 a6 = get_unaligned_le32(s+24);
u32 a7 = get_unaligned_le32(s+28);
h[0] = a0&((1<<26)-1); /* 26 used, 32-26 left. 26 */
h[1] = (a0>>26) | ((a1&((1<<19)-1))<< 6); /* (32-26) + 19 = 6+19 = 25 */
h[2] = (a1>>19) | ((a2&((1<<13)-1))<<13); /* (32-19) + 13 = 13+13 = 26 */
h[3] = (a2>>13) | ((a3&((1<< 6)-1))<<19); /* (32-13) + 6 = 19+ 6 = 25 */
h[4] = (a3>> 6); /* (32- 6) = 26 */
h[5] = a4&((1<<25)-1); /* 25 */
h[6] = (a4>>25) | ((a5&((1<<19)-1))<< 7); /* (32-25) + 19 = 7+19 = 26 */
h[7] = (a5>>19) | ((a6&((1<<12)-1))<<13); /* (32-19) + 12 = 13+12 = 25 */
h[8] = (a6>>12) | ((a7&((1<< 6)-1))<<20); /* (32-12) + 6 = 20+ 6 = 26 */
h[9] = (a7>> 6)&((1<<25)-1); /* 25 */
}
static __always_inline void fe_frombytes(fe *h, const u8 *s)
{
fe_frombytes_impl(h->v, s);
}
static __always_inline u8 /*bool*/
addcarryx_u25(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
{
/* This function extracts 25 bits of result and 1 bit of carry
* (26 total), so a 32-bit intermediate is sufficient.
*/
u32 x = a + b + c;
*low = x & ((1 << 25) - 1);
return (x >> 25) & 1;
}
static __always_inline u8 /*bool*/
addcarryx_u26(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
{
/* This function extracts 26 bits of result and 1 bit of carry
* (27 total), so a 32-bit intermediate is sufficient.
*/
u32 x = a + b + c;
*low = x & ((1 << 26) - 1);
return (x >> 26) & 1;
}
static __always_inline u8 /*bool*/
subborrow_u25(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
{
/* This function extracts 25 bits of result and 1 bit of borrow
* (26 total), so a 32-bit intermediate is sufficient.
*/
u32 x = a - b - c;
*low = x & ((1 << 25) - 1);
return x >> 31;
}
static __always_inline u8 /*bool*/
subborrow_u26(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
{
/* This function extracts 26 bits of result and 1 bit of borrow
*(27 total), so a 32-bit intermediate is sufficient.
*/
u32 x = a - b - c;
*low = x & ((1 << 26) - 1);
return x >> 31;
}
static __always_inline u32 cmovznz32(u32 t, u32 z, u32 nz)
{
t = -!!t; /* all set if nonzero, 0 if 0 */
return (t&nz) | ((~t)&z);
}
static __always_inline void fe_freeze(u32 out[10], const u32 in1[10])
{
{ const u32 x17 = in1[9];
{ const u32 x18 = in1[8];
{ const u32 x16 = in1[7];
{ const u32 x14 = in1[6];
{ const u32 x12 = in1[5];
{ const u32 x10 = in1[4];
{ const u32 x8 = in1[3];
{ const u32 x6 = in1[2];
{ const u32 x4 = in1[1];
{ const u32 x2 = in1[0];
{ u32 x20; u8/*bool*/ x21 = subborrow_u26(0x0, x2, 0x3ffffed, &x20);
{ u32 x23; u8/*bool*/ x24 = subborrow_u25(x21, x4, 0x1ffffff, &x23);
{ u32 x26; u8/*bool*/ x27 = subborrow_u26(x24, x6, 0x3ffffff, &x26);
{ u32 x29; u8/*bool*/ x30 = subborrow_u25(x27, x8, 0x1ffffff, &x29);
{ u32 x32; u8/*bool*/ x33 = subborrow_u26(x30, x10, 0x3ffffff, &x32);
{ u32 x35; u8/*bool*/ x36 = subborrow_u25(x33, x12, 0x1ffffff, &x35);
{ u32 x38; u8/*bool*/ x39 = subborrow_u26(x36, x14, 0x3ffffff, &x38);
{ u32 x41; u8/*bool*/ x42 = subborrow_u25(x39, x16, 0x1ffffff, &x41);
{ u32 x44; u8/*bool*/ x45 = subborrow_u26(x42, x18, 0x3ffffff, &x44);
{ u32 x47; u8/*bool*/ x48 = subborrow_u25(x45, x17, 0x1ffffff, &x47);
{ u32 x49 = cmovznz32(x48, 0x0, 0xffffffff);
{ u32 x50 = (x49 & 0x3ffffed);
{ u32 x52; u8/*bool*/ x53 = addcarryx_u26(0x0, x20, x50, &x52);
{ u32 x54 = (x49 & 0x1ffffff);
{ u32 x56; u8/*bool*/ x57 = addcarryx_u25(x53, x23, x54, &x56);
{ u32 x58 = (x49 & 0x3ffffff);
{ u32 x60; u8/*bool*/ x61 = addcarryx_u26(x57, x26, x58, &x60);
{ u32 x62 = (x49 & 0x1ffffff);
{ u32 x64; u8/*bool*/ x65 = addcarryx_u25(x61, x29, x62, &x64);
{ u32 x66 = (x49 & 0x3ffffff);
{ u32 x68; u8/*bool*/ x69 = addcarryx_u26(x65, x32, x66, &x68);
{ u32 x70 = (x49 & 0x1ffffff);
{ u32 x72; u8/*bool*/ x73 = addcarryx_u25(x69, x35, x70, &x72);
{ u32 x74 = (x49 & 0x3ffffff);
{ u32 x76; u8/*bool*/ x77 = addcarryx_u26(x73, x38, x74, &x76);
{ u32 x78 = (x49 & 0x1ffffff);
{ u32 x80; u8/*bool*/ x81 = addcarryx_u25(x77, x41, x78, &x80);
{ u32 x82 = (x49 & 0x3ffffff);
{ u32 x84; u8/*bool*/ x85 = addcarryx_u26(x81, x44, x82, &x84);
{ u32 x86 = (x49 & 0x1ffffff);
{ u32 x88; addcarryx_u25(x85, x47, x86, &x88);
out[0] = x52;
out[1] = x56;
out[2] = x60;
out[3] = x64;
out[4] = x68;
out[5] = x72;
out[6] = x76;
out[7] = x80;
out[8] = x84;
out[9] = x88;
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
static __always_inline void fe_tobytes(u8 s[32], const fe *f)
{
u32 h[10];
fe_freeze(h, f->v);
s[0] = h[0] >> 0;
s[1] = h[0] >> 8;
s[2] = h[0] >> 16;
s[3] = (h[0] >> 24) | (h[1] << 2);
s[4] = h[1] >> 6;
s[5] = h[1] >> 14;
s[6] = (h[1] >> 22) | (h[2] << 3);
s[7] = h[2] >> 5;
s[8] = h[2] >> 13;
s[9] = (h[2] >> 21) | (h[3] << 5);
s[10] = h[3] >> 3;
s[11] = h[3] >> 11;
s[12] = (h[3] >> 19) | (h[4] << 6);
s[13] = h[4] >> 2;
s[14] = h[4] >> 10;
s[15] = h[4] >> 18;
s[16] = h[5] >> 0;
s[17] = h[5] >> 8;
s[18] = h[5] >> 16;
s[19] = (h[5] >> 24) | (h[6] << 1);
s[20] = h[6] >> 7;
s[21] = h[6] >> 15;
s[22] = (h[6] >> 23) | (h[7] << 3);
s[23] = h[7] >> 5;
s[24] = h[7] >> 13;
s[25] = (h[7] >> 21) | (h[8] << 4);
s[26] = h[8] >> 4;
s[27] = h[8] >> 12;
s[28] = (h[8] >> 20) | (h[9] << 6);
s[29] = h[9] >> 2;
s[30] = h[9] >> 10;
s[31] = h[9] >> 18;
}
/* h = f */
static __always_inline void fe_copy(fe *h, const fe *f)
{
memmove(h, f, sizeof(u32) * 10);
}
static __always_inline void fe_copy_lt(fe_loose *h, const fe *f)
{
memmove(h, f, sizeof(u32) * 10);
}
/* h = 0 */
static __always_inline void fe_0(fe *h)
{
memset(h, 0, sizeof(u32) * 10);
}
/* h = 1 */
static __always_inline void fe_1(fe *h)
{
memset(h, 0, sizeof(u32) * 10);
h->v[0] = 1;
}
static void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
{
{ const u32 x20 = in1[9];
{ const u32 x21 = in1[8];
{ const u32 x19 = in1[7];
{ const u32 x17 = in1[6];
{ const u32 x15 = in1[5];
{ const u32 x13 = in1[4];
{ const u32 x11 = in1[3];
{ const u32 x9 = in1[2];
{ const u32 x7 = in1[1];
{ const u32 x5 = in1[0];
{ const u32 x38 = in2[9];
{ const u32 x39 = in2[8];
{ const u32 x37 = in2[7];
{ const u32 x35 = in2[6];
{ const u32 x33 = in2[5];
{ const u32 x31 = in2[4];
{ const u32 x29 = in2[3];
{ const u32 x27 = in2[2];
{ const u32 x25 = in2[1];
{ const u32 x23 = in2[0];
out[0] = (x5 + x23);
out[1] = (x7 + x25);
out[2] = (x9 + x27);
out[3] = (x11 + x29);
out[4] = (x13 + x31);
out[5] = (x15 + x33);
out[6] = (x17 + x35);
out[7] = (x19 + x37);
out[8] = (x21 + x39);
out[9] = (x20 + x38);
}}}}}}}}}}}}}}}}}}}}
}
/* h = f + g
* Can overlap h with f or g.
*/
static __always_inline void fe_add(fe_loose *h, const fe *f, const fe *g)
{
fe_add_impl(h->v, f->v, g->v);
}
static void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
{
{ const u32 x20 = in1[9];
{ const u32 x21 = in1[8];
{ const u32 x19 = in1[7];
{ const u32 x17 = in1[6];
{ const u32 x15 = in1[5];
{ const u32 x13 = in1[4];
{ const u32 x11 = in1[3];
{ const u32 x9 = in1[2];
{ const u32 x7 = in1[1];
{ const u32 x5 = in1[0];
{ const u32 x38 = in2[9];
{ const u32 x39 = in2[8];
{ const u32 x37 = in2[7];
{ const u32 x35 = in2[6];
{ const u32 x33 = in2[5];
{ const u32 x31 = in2[4];
{ const u32 x29 = in2[3];
{ const u32 x27 = in2[2];
{ const u32 x25 = in2[1];
{ const u32 x23 = in2[0];
out[0] = ((0x7ffffda + x5) - x23);
out[1] = ((0x3fffffe + x7) - x25);
out[2] = ((0x7fffffe + x9) - x27);
out[3] = ((0x3fffffe + x11) - x29);
out[4] = ((0x7fffffe + x13) - x31);
out[5] = ((0x3fffffe + x15) - x33);
out[6] = ((0x7fffffe + x17) - x35);
out[7] = ((0x3fffffe + x19) - x37);
out[8] = ((0x7fffffe + x21) - x39);
out[9] = ((0x3fffffe + x20) - x38);
}}}}}}}}}}}}}}}}}}}}
}
/* h = f - g
* Can overlap h with f or g.
*/
static __always_inline void fe_sub(fe_loose *h, const fe *f, const fe *g)
{
fe_sub_impl(h->v, f->v, g->v);
}
static void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
{
{ const u32 x20 = in1[9];
{ const u32 x21 = in1[8];
{ const u32 x19 = in1[7];
{ const u32 x17 = in1[6];
{ const u32 x15 = in1[5];
{ const u32 x13 = in1[4];
{ const u32 x11 = in1[3];
{ const u32 x9 = in1[2];
{ const u32 x7 = in1[1];
{ const u32 x5 = in1[0];
{ const u32 x38 = in2[9];
{ const u32 x39 = in2[8];
{ const u32 x37 = in2[7];
{ const u32 x35 = in2[6];
{ const u32 x33 = in2[5];
{ const u32 x31 = in2[4];
{ const u32 x29 = in2[3];
{ const u32 x27 = in2[2];
{ const u32 x25 = in2[1];
{ const u32 x23 = in2[0];
{ u64 x40 = ((u64)x23 * x5);
{ u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5));
{ u64 x42 = ((((u64)(0x2 * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5));
{ u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5));
{ u64 x44 = (((((u64)x27 * x9) + (0x2 * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5));
{ u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5));
{ u64 x46 = (((((0x2 * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5));
{ u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5));
{ u64 x48 = (((((((u64)x31 * x13) + (0x2 * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5));
{ u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5));
{ u64 x50 = (((((0x2 * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9));
{ u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9));
{ u64 x52 = (((((u64)x35 * x17) + (0x2 * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13));
{ u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13));
{ u64 x54 = (((0x2 * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17));
{ u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17));
{ u64 x56 = (((u64)x39 * x21) + (0x2 * (((u64)x37 * x20) + ((u64)x38 * x19))));
{ u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21));
{ u64 x58 = ((u64)(0x2 * x38) * x20);
{ u64 x59 = (x48 + (x58 << 0x4));
{ u64 x60 = (x59 + (x58 << 0x1));
{ u64 x61 = (x60 + x58);
{ u64 x62 = (x47 + (x57 << 0x4));
{ u64 x63 = (x62 + (x57 << 0x1));
{ u64 x64 = (x63 + x57);
{ u64 x65 = (x46 + (x56 << 0x4));
{ u64 x66 = (x65 + (x56 << 0x1));
{ u64 x67 = (x66 + x56);
{ u64 x68 = (x45 + (x55 << 0x4));
{ u64 x69 = (x68 + (x55 << 0x1));
{ u64 x70 = (x69 + x55);
{ u64 x71 = (x44 + (x54 << 0x4));
{ u64 x72 = (x71 + (x54 << 0x1));
{ u64 x73 = (x72 + x54);
{ u64 x74 = (x43 + (x53 << 0x4));
{ u64 x75 = (x74 + (x53 << 0x1));
{ u64 x76 = (x75 + x53);
{ u64 x77 = (x42 + (x52 << 0x4));
{ u64 x78 = (x77 + (x52 << 0x1));
{ u64 x79 = (x78 + x52);
{ u64 x80 = (x41 + (x51 << 0x4));
{ u64 x81 = (x80 + (x51 << 0x1));
{ u64 x82 = (x81 + x51);
{ u64 x83 = (x40 + (x50 << 0x4));
{ u64 x84 = (x83 + (x50 << 0x1));
{ u64 x85 = (x84 + x50);
{ u64 x86 = (x85 >> 0x1a);
{ u32 x87 = ((u32)x85 & 0x3ffffff);
{ u64 x88 = (x86 + x82);
{ u64 x89 = (x88 >> 0x19);
{ u32 x90 = ((u32)x88 & 0x1ffffff);
{ u64 x91 = (x89 + x79);
{ u64 x92 = (x91 >> 0x1a);
{ u32 x93 = ((u32)x91 & 0x3ffffff);
{ u64 x94 = (x92 + x76);
{ u64 x95 = (x94 >> 0x19);
{ u32 x96 = ((u32)x94 & 0x1ffffff);
{ u64 x97 = (x95 + x73);
{ u64 x98 = (x97 >> 0x1a);
{ u32 x99 = ((u32)x97 & 0x3ffffff);
{ u64 x100 = (x98 + x70);
{ u64 x101 = (x100 >> 0x19);
{ u32 x102 = ((u32)x100 & 0x1ffffff);
{ u64 x103 = (x101 + x67);
{ u64 x104 = (x103 >> 0x1a);
{ u32 x105 = ((u32)x103 & 0x3ffffff);
{ u64 x106 = (x104 + x64);
{ u64 x107 = (x106 >> 0x19);
{ u32 x108 = ((u32)x106 & 0x1ffffff);
{ u64 x109 = (x107 + x61);
{ u64 x110 = (x109 >> 0x1a);
{ u32 x111 = ((u32)x109 & 0x3ffffff);
{ u64 x112 = (x110 + x49);
{ u64 x113 = (x112 >> 0x19);
{ u32 x114 = ((u32)x112 & 0x1ffffff);
{ u64 x115 = (x87 + (0x13 * x113));
{ u32 x116 = (u32) (x115 >> 0x1a);
{ u32 x117 = ((u32)x115 & 0x3ffffff);
{ u32 x118 = (x116 + x90);
{ u32 x119 = (x118 >> 0x19);
{ u32 x120 = (x118 & 0x1ffffff);
out[0] = x117;
out[1] = x120;
out[2] = (x119 + x93);
out[3] = x96;
out[4] = x99;
out[5] = x102;
out[6] = x105;
out[7] = x108;
out[8] = x111;
out[9] = x114;
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
static __always_inline void fe_mul_ttt(fe *h, const fe *f, const fe *g)
{
fe_mul_impl(h->v, f->v, g->v);
}
static __always_inline void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g)
{
fe_mul_impl(h->v, f->v, g->v);
}
static __always_inline void
fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g)
{
fe_mul_impl(h->v, f->v, g->v);
}
static void fe_sqr_impl(u32 out[10], const u32 in1[10])
{
{ const u32 x17 = in1[9];
{ const u32 x18 = in1[8];
{ const u32 x16 = in1[7];
{ const u32 x14 = in1[6];
{ const u32 x12 = in1[5];
{ const u32 x10 = in1[4];
{ const u32 x8 = in1[3];
{ const u32 x6 = in1[2];
{ const u32 x4 = in1[1];
{ const u32 x2 = in1[0];
{ u64 x19 = ((u64)x2 * x2);
{ u64 x20 = ((u64)(0x2 * x2) * x4);
{ u64 x21 = (0x2 * (((u64)x4 * x4) + ((u64)x2 * x6)));
{ u64 x22 = (0x2 * (((u64)x4 * x6) + ((u64)x2 * x8)));
{ u64 x23 = ((((u64)x6 * x6) + ((u64)(0x4 * x4) * x8)) + ((u64)(0x2 * x2) * x10));
{ u64 x24 = (0x2 * ((((u64)x6 * x8) + ((u64)x4 * x10)) + ((u64)x2 * x12)));
{ u64 x25 = (0x2 * (((((u64)x8 * x8) + ((u64)x6 * x10)) + ((u64)x2 * x14)) + ((u64)(0x2 * x4) * x12)));
{ u64 x26 = (0x2 * (((((u64)x8 * x10) + ((u64)x6 * x12)) + ((u64)x4 * x14)) + ((u64)x2 * x16)));
{ u64 x27 = (((u64)x10 * x10) + (0x2 * ((((u64)x6 * x14) + ((u64)x2 * x18)) + (0x2 * (((u64)x4 * x16) + ((u64)x8 * x12))))));
{ u64 x28 = (0x2 * ((((((u64)x10 * x12) + ((u64)x8 * x14)) + ((u64)x6 * x16)) + ((u64)x4 * x18)) + ((u64)x2 * x17)));
{ u64 x29 = (0x2 * (((((u64)x12 * x12) + ((u64)x10 * x14)) + ((u64)x6 * x18)) + (0x2 * (((u64)x8 * x16) + ((u64)x4 * x17)))));
{ u64 x30 = (0x2 * (((((u64)x12 * x14) + ((u64)x10 * x16)) + ((u64)x8 * x18)) + ((u64)x6 * x17)));
{ u64 x31 = (((u64)x14 * x14) + (0x2 * (((u64)x10 * x18) + (0x2 * (((u64)x12 * x16) + ((u64)x8 * x17))))));
{ u64 x32 = (0x2 * ((((u64)x14 * x16) + ((u64)x12 * x18)) + ((u64)x10 * x17)));
{ u64 x33 = (0x2 * ((((u64)x16 * x16) + ((u64)x14 * x18)) + ((u64)(0x2 * x12) * x17)));
{ u64 x34 = (0x2 * (((u64)x16 * x18) + ((u64)x14 * x17)));
{ u64 x35 = (((u64)x18 * x18) + ((u64)(0x4 * x16) * x17));
{ u64 x36 = ((u64)(0x2 * x18) * x17);
{ u64 x37 = ((u64)(0x2 * x17) * x17);
{ u64 x38 = (x27 + (x37 << 0x4));
{ u64 x39 = (x38 + (x37 << 0x1));
{ u64 x40 = (x39 + x37);
{ u64 x41 = (x26 + (x36 << 0x4));
{ u64 x42 = (x41 + (x36 << 0x1));
{ u64 x43 = (x42 + x36);
{ u64 x44 = (x25 + (x35 << 0x4));
{ u64 x45 = (x44 + (x35 << 0x1));
{ u64 x46 = (x45 + x35);
{ u64 x47 = (x24 + (x34 << 0x4));
{ u64 x48 = (x47 + (x34 << 0x1));
{ u64 x49 = (x48 + x34);
{ u64 x50 = (x23 + (x33 << 0x4));
{ u64 x51 = (x50 + (x33 << 0x1));
{ u64 x52 = (x51 + x33);
{ u64 x53 = (x22 + (x32 << 0x4));
{ u64 x54 = (x53 + (x32 << 0x1));
{ u64 x55 = (x54 + x32);
{ u64 x56 = (x21 + (x31 << 0x4));
{ u64 x57 = (x56 + (x31 << 0x1));
{ u64 x58 = (x57 + x31);
{ u64 x59 = (x20 + (x30 << 0x4));
{ u64 x60 = (x59 + (x30 << 0x1));
{ u64 x61 = (x60 + x30);
{ u64 x62 = (x19 + (x29 << 0x4));
{ u64 x63 = (x62 + (x29 << 0x1));
{ u64 x64 = (x63 + x29);
{ u64 x65 = (x64 >> 0x1a);
{ u32 x66 = ((u32)x64 & 0x3ffffff);
{ u64 x67 = (x65 + x61);
{ u64 x68 = (x67 >> 0x19);
{ u32 x69 = ((u32)x67 & 0x1ffffff);
{ u64 x70 = (x68 + x58);
{ u64 x71 = (x70 >> 0x1a);
{ u32 x72 = ((u32)x70 & 0x3ffffff);
{ u64 x73 = (x71 + x55);
{ u64 x74 = (x73 >> 0x19);
{ u32 x75 = ((u32)x73 & 0x1ffffff);
{ u64 x76 = (x74 + x52);
{ u64 x77 = (x76 >> 0x1a);
{ u32 x78 = ((u32)x76 & 0x3ffffff);
{ u64 x79 = (x77 + x49);
{ u64 x80 = (x79 >> 0x19);
{ u32 x81 = ((u32)x79 & 0x1ffffff);
{ u64 x82 = (x80 + x46);
{ u64 x83 = (x82 >> 0x1a);
{ u32 x84 = ((u32)x82 & 0x3ffffff);
{ u64 x85 = (x83 + x43);
{ u64 x86 = (x85 >> 0x19);
{ u32 x87 = ((u32)x85 & 0x1ffffff);
{ u64 x88 = (x86 + x40);
{ u64 x89 = (x88 >> 0x1a);
{ u32 x90 = ((u32)x88 & 0x3ffffff);
{ u64 x91 = (x89 + x28);
{ u64 x92 = (x91 >> 0x19);
{ u32 x93 = ((u32)x91 & 0x1ffffff);
{ u64 x94 = (x66 + (0x13 * x92));
{ u32 x95 = (u32) (x94 >> 0x1a);
{ u32 x96 = ((u32)x94 & 0x3ffffff);
{ u32 x97 = (x95 + x69);
{ u32 x98 = (x97 >> 0x19);
{ u32 x99 = (x97 & 0x1ffffff);
out[0] = x96;
out[1] = x99;
out[2] = (x98 + x72);
out[3] = x75;
out[4] = x78;
out[5] = x81;
out[6] = x84;
out[7] = x87;
out[8] = x90;
out[9] = x93;
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
static __always_inline void fe_sq_tl(fe *h, const fe_loose *f)
{
fe_sqr_impl(h->v, f->v);
}
static __always_inline void fe_sq_tt(fe *h, const fe *f)
{
fe_sqr_impl(h->v, f->v);
}
static __always_inline void fe_loose_invert(fe *out, const fe_loose *z)
{
fe t0;
fe t1;
fe t2;
fe t3;
int i;
fe_sq_tl(&t0, z);
fe_sq_tt(&t1, &t0);
for (i = 1; i < 2; ++i)
fe_sq_tt(&t1, &t1);
fe_mul_tlt(&t1, z, &t1);
fe_mul_ttt(&t0, &t0, &t1);
fe_sq_tt(&t2, &t0);
fe_mul_ttt(&t1, &t1, &t2);
fe_sq_tt(&t2, &t1);
for (i = 1; i < 5; ++i)
fe_sq_tt(&t2, &t2);
fe_mul_ttt(&t1, &t2, &t1);
fe_sq_tt(&t2, &t1);
for (i = 1; i < 10; ++i)
fe_sq_tt(&t2, &t2);
fe_mul_ttt(&t2, &t2, &t1);
fe_sq_tt(&t3, &t2);
for (i = 1; i < 20; ++i)
fe_sq_tt(&t3, &t3);
fe_mul_ttt(&t2, &t3, &t2);
fe_sq_tt(&t2, &t2);
for (i = 1; i < 10; ++i)
fe_sq_tt(&t2, &t2);
fe_mul_ttt(&t1, &t2, &t1);
fe_sq_tt(&t2, &t1);
for (i = 1; i < 50; ++i)
fe_sq_tt(&t2, &t2);
fe_mul_ttt(&t2, &t2, &t1);
fe_sq_tt(&t3, &t2);
for (i = 1; i < 100; ++i)
fe_sq_tt(&t3, &t3);
fe_mul_ttt(&t2, &t3, &t2);
fe_sq_tt(&t2, &t2);
for (i = 1; i < 50; ++i)
fe_sq_tt(&t2, &t2);
fe_mul_ttt(&t1, &t2, &t1);
fe_sq_tt(&t1, &t1);
for (i = 1; i < 5; ++i)
fe_sq_tt(&t1, &t1);
fe_mul_ttt(out, &t1, &t0);
}
static __always_inline void fe_invert(fe *out, const fe *z)
{
fe_loose l;
fe_copy_lt(&l, z);
fe_loose_invert(out, &l);
}
/* Replace (f,g) with (g,f) if b == 1;
* replace (f,g) with (f,g) if b == 0.
*
* Preconditions: b in {0,1}
*/
static __always_inline void fe_cswap(fe *f, fe *g, unsigned int b)
{
unsigned i;
b = 0 - b;
for (i = 0; i < 10; i++) {
u32 x = f->v[i] ^ g->v[i];
x &= b;
f->v[i] ^= x;
g->v[i] ^= x;
}
}
/* NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0.*/
static __always_inline void fe_mul_121666_impl(u32 out[10], const u32 in1[10])
{
{ const u32 x20 = in1[9];
{ const u32 x21 = in1[8];
{ const u32 x19 = in1[7];
{ const u32 x17 = in1[6];
{ const u32 x15 = in1[5];
{ const u32 x13 = in1[4];
{ const u32 x11 = in1[3];
{ const u32 x9 = in1[2];
{ const u32 x7 = in1[1];
{ const u32 x5 = in1[0];
{ const u32 x38 = 0;
{ const u32 x39 = 0;
{ const u32 x37 = 0;
{ const u32 x35 = 0;
{ const u32 x33 = 0;
{ const u32 x31 = 0;
{ const u32 x29 = 0;
{ const u32 x27 = 0;
{ const u32 x25 = 0;
{ const u32 x23 = 121666;
{ u64 x40 = ((u64)x23 * x5);
{ u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5));
{ u64 x42 = ((((u64)(0x2 * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5));
{ u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5));
{ u64 x44 = (((((u64)x27 * x9) + (0x2 * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5));
{ u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5));
{ u64 x46 = (((((0x2 * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5));
{ u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5));
{ u64 x48 = (((((((u64)x31 * x13) + (0x2 * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5));
{ u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5));
{ u64 x50 = (((((0x2 * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9));
{ u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9));
{ u64 x52 = (((((u64)x35 * x17) + (0x2 * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13));
{ u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13));
{ u64 x54 = (((0x2 * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17));
{ u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17));
{ u64 x56 = (((u64)x39 * x21) + (0x2 * (((u64)x37 * x20) + ((u64)x38 * x19))));
{ u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21));
{ u64 x58 = ((u64)(0x2 * x38) * x20);
{ u64 x59 = (x48 + (x58 << 0x4));
{ u64 x60 = (x59 + (x58 << 0x1));
{ u64 x61 = (x60 + x58);
{ u64 x62 = (x47 + (x57 << 0x4));
{ u64 x63 = (x62 + (x57 << 0x1));
{ u64 x64 = (x63 + x57);
{ u64 x65 = (x46 + (x56 << 0x4));
{ u64 x66 = (x65 + (x56 << 0x1));
{ u64 x67 = (x66 + x56);
{ u64 x68 = (x45 + (x55 << 0x4));
{ u64 x69 = (x68 + (x55 << 0x1));
{ u64 x70 = (x69 + x55);
{ u64 x71 = (x44 + (x54 << 0x4));
{ u64 x72 = (x71 + (x54 << 0x1));
{ u64 x73 = (x72 + x54);
{ u64 x74 = (x43 + (x53 << 0x4));
{ u64 x75 = (x74 + (x53 << 0x1));
{ u64 x76 = (x75 + x53);
{ u64 x77 = (x42 + (x52 << 0x4));
{ u64 x78 = (x77 + (x52 << 0x1));
{ u64 x79 = (x78 + x52);
{ u64 x80 = (x41 + (x51 << 0x4));
{ u64 x81 = (x80 + (x51 << 0x1));
{ u64 x82 = (x81 + x51);
{ u64 x83 = (x40 + (x50 << 0x4));
{ u64 x84 = (x83 + (x50 << 0x1));
{ u64 x85 = (x84 + x50);
{ u64 x86 = (x85 >> 0x1a);
{ u32 x87 = ((u32)x85 & 0x3ffffff);
{ u64 x88 = (x86 + x82);
{ u64 x89 = (x88 >> 0x19);
{ u32 x90 = ((u32)x88 & 0x1ffffff);
{ u64 x91 = (x89 + x79);
{ u64 x92 = (x91 >> 0x1a);
{ u32 x93 = ((u32)x91 & 0x3ffffff);
{ u64 x94 = (x92 + x76);
{ u64 x95 = (x94 >> 0x19);
{ u32 x96 = ((u32)x94 & 0x1ffffff);
{ u64 x97 = (x95 + x73);
{ u64 x98 = (x97 >> 0x1a);
{ u32 x99 = ((u32)x97 & 0x3ffffff);
{ u64 x100 = (x98 + x70);
{ u64 x101 = (x100 >> 0x19);
{ u32 x102 = ((u32)x100 & 0x1ffffff);
{ u64 x103 = (x101 + x67);
{ u64 x104 = (x103 >> 0x1a);
{ u32 x105 = ((u32)x103 & 0x3ffffff);
{ u64 x106 = (x104 + x64);
{ u64 x107 = (x106 >> 0x19);
{ u32 x108 = ((u32)x106 & 0x1ffffff);
{ u64 x109 = (x107 + x61);
{ u64 x110 = (x109 >> 0x1a);
{ u32 x111 = ((u32)x109 & 0x3ffffff);
{ u64 x112 = (x110 + x49);
{ u64 x113 = (x112 >> 0x19);
{ u32 x114 = ((u32)x112 & 0x1ffffff);
{ u64 x115 = (x87 + (0x13 * x113));
{ u32 x116 = (u32) (x115 >> 0x1a);
{ u32 x117 = ((u32)x115 & 0x3ffffff);
{ u32 x118 = (x116 + x90);
{ u32 x119 = (x118 >> 0x19);
{ u32 x120 = (x118 & 0x1ffffff);
out[0] = x117;
out[1] = x120;
out[2] = (x119 + x93);
out[3] = x96;
out[4] = x99;
out[5] = x102;
out[6] = x105;
out[7] = x108;
out[8] = x111;
out[9] = x114;
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
static __always_inline void fe_mul121666(fe *h, const fe_loose *f)
{
fe_mul_121666_impl(h->v, f->v);
}
void curve25519_generic(u8 out[CURVE25519_KEY_SIZE],
const u8 scalar[CURVE25519_KEY_SIZE],
const u8 point[CURVE25519_KEY_SIZE])
{
fe x1, x2, z2, x3, z3;
fe_loose x2l, z2l, x3l;
unsigned swap = 0;
int pos;
u8 e[32];
memcpy(e, scalar, 32);
/* The following implementation was transcribed to Coq and proven to
* correspond to unary scalar multiplication in affine coordinates given
* that x1 != 0 is the x coordinate of some point on the curve. It was
* also checked in Coq that doing a ladderstep with x1 = x3 = 0 gives
* z2' = z3' = 0, and z2 = z3 = 0 gives z2' = z3' = 0. The statement was
* quantified over the underlying field, so it applies to Curve25519
* itself and the quadratic twist of Curve25519. It was not proven in
* Coq that prime-field arithmetic correctly simulates extension-field
* arithmetic on prime-field values. The decoding of the byte array
* representation of e was not considered.
*
* Specification of Montgomery curves in affine coordinates:
* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Spec/MontgomeryCurve.v#L27>
*
* Proof that these form a group that is isomorphic to a Weierstrass
* curve:
* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/AffineProofs.v#L35>
*
* Coq transcription and correctness proof of the loop
* (where scalarbits=255):
* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L118>
* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L278>
* preconditions: 0 <= e < 2^255 (not necessarily e < order),
* fe_invert(0) = 0
*/
fe_frombytes(&x1, point);
fe_1(&x2);
fe_0(&z2);
fe_copy(&x3, &x1);
fe_1(&z3);
for (pos = 254; pos >= 0; --pos) {
fe tmp0, tmp1;
fe_loose tmp0l, tmp1l;
/* loop invariant as of right before the test, for the case
* where x1 != 0:
* pos >= -1; if z2 = 0 then x2 is nonzero; if z3 = 0 then x3
* is nonzero
* let r := e >> (pos+1) in the following equalities of
* projective points:
* to_xz (r*P) === if swap then (x3, z3) else (x2, z2)
* to_xz ((r+1)*P) === if swap then (x2, z2) else (x3, z3)
* x1 is the nonzero x coordinate of the nonzero
* point (r*P-(r+1)*P)
*/
unsigned b = 1 & (e[pos / 8] >> (pos & 7));
swap ^= b;
fe_cswap(&x2, &x3, swap);
fe_cswap(&z2, &z3, swap);
swap = b;
/* Coq transcription of ladderstep formula (called from
* transcribed loop):
* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L89>
* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L131>
* x1 != 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L217>
* x1 = 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L147>
*/
fe_sub(&tmp0l, &x3, &z3);
fe_sub(&tmp1l, &x2, &z2);
fe_add(&x2l, &x2, &z2);
fe_add(&z2l, &x3, &z3);
fe_mul_tll(&z3, &tmp0l, &x2l);
fe_mul_tll(&z2, &z2l, &tmp1l);
fe_sq_tl(&tmp0, &tmp1l);
fe_sq_tl(&tmp1, &x2l);
fe_add(&x3l, &z3, &z2);
fe_sub(&z2l, &z3, &z2);
fe_mul_ttt(&x2, &tmp1, &tmp0);
fe_sub(&tmp1l, &tmp1, &tmp0);
fe_sq_tl(&z2, &z2l);
fe_mul121666(&z3, &tmp1l);
fe_sq_tl(&x3, &x3l);
fe_add(&tmp0l, &tmp0, &z3);
fe_mul_ttt(&z3, &x1, &z2);
fe_mul_tll(&z2, &tmp1l, &tmp0l);
}
/* here pos=-1, so r=e, so to_xz (e*P) === if swap then (x3, z3)
* else (x2, z2)
*/
fe_cswap(&x2, &x3, swap);
fe_cswap(&z2, &z3, swap);
fe_invert(&z2, &z2);
fe_mul_ttt(&x2, &x2, &z2);
fe_tobytes(out, &x2);
memzero_explicit(&x1, sizeof(x1));
memzero_explicit(&x2, sizeof(x2));
memzero_explicit(&z2, sizeof(z2));
memzero_explicit(&x3, sizeof(x3));
memzero_explicit(&z3, sizeof(z3));
memzero_explicit(&x2l, sizeof(x2l));
memzero_explicit(&z2l, sizeof(z2l));
memzero_explicit(&x3l, sizeof(x3l));
memzero_explicit(&e, sizeof(e));
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,847 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2019-2020 Rubicon Communications, LLC (Netgate)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include <sys/param.h>
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/priv.h>
#include <sys/mutex.h>
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/proc.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/queue.h>
#include <sys/smp.h>
#include <net/if.h>
#include <net/ethernet.h>
#include <net/if_var.h>
#include <net/iflib.h>
#include <net/if_clone.h>
#include <net/radix.h>
#include <net/bpf.h>
#include <net/mp_ring.h>
#include "ifdi_if.h"
#include <sys/wg_module.h>
#include <crypto/zinc.h>
#include <sys/wg_noise.h>
#include <sys/if_wg_session_vars.h>
#include <sys/if_wg_session.h>
MALLOC_DEFINE(M_WG, "WG", "wireguard");
#define WG_CAPS IFCAP_LINKSTATE
#define ph_family PH_loc.eight[5]
TASKQGROUP_DECLARE(if_io_tqg);
static int clone_count;
uma_zone_t ratelimit_zone;
void
wg_encrypt_dispatch(struct wg_softc *sc)
{
for (int i = 0; i < mp_ncpus; i++) {
if (sc->sc_encrypt[i].gt_task.ta_flags & TASK_ENQUEUED)
continue;
GROUPTASK_ENQUEUE(&sc->sc_encrypt[i]);
}
}
void
wg_decrypt_dispatch(struct wg_softc *sc)
{
for (int i = 0; i < mp_ncpus; i++) {
if (sc->sc_decrypt[i].gt_task.ta_flags & TASK_ENQUEUED)
continue;
GROUPTASK_ENQUEUE(&sc->sc_decrypt[i]);
}
}
static void
crypto_taskq_setup(struct wg_softc *sc)
{
device_t dev = iflib_get_dev(sc->wg_ctx);
sc->sc_encrypt = malloc(sizeof(struct grouptask)*mp_ncpus, M_WG, M_WAITOK);
sc->sc_decrypt = malloc(sizeof(struct grouptask)*mp_ncpus, M_WG, M_WAITOK);
for (int i = 0; i < mp_ncpus; i++) {
GROUPTASK_INIT(&sc->sc_encrypt[i], 0,
(gtask_fn_t *)wg_softc_encrypt, sc);
taskqgroup_attach_cpu(qgroup_if_io_tqg, &sc->sc_encrypt[i], sc, i, dev, NULL, "wg encrypt");
GROUPTASK_INIT(&sc->sc_decrypt[i], 0,
(gtask_fn_t *)wg_softc_decrypt, sc);
taskqgroup_attach_cpu(qgroup_if_io_tqg, &sc->sc_decrypt[i], sc, i, dev, NULL, "wg decrypt");
}
}
static void
crypto_taskq_destroy(struct wg_softc *sc)
{
for (int i = 0; i < mp_ncpus; i++) {
taskqgroup_detach(qgroup_if_io_tqg, &sc->sc_encrypt[i]);
taskqgroup_detach(qgroup_if_io_tqg, &sc->sc_decrypt[i]);
}
free(sc->sc_encrypt, M_WG);
free(sc->sc_decrypt, M_WG);
}
static int
wg_cloneattach(if_ctx_t ctx, struct if_clone *ifc, const char *name, caddr_t params)
{
struct wg_softc *sc = iflib_get_softc(ctx);
if_softc_ctx_t scctx;
device_t dev;
struct iovec iov;
nvlist_t *nvl;
void *packed;
struct noise_local *local;
uint8_t public[WG_KEY_SIZE];
struct noise_upcall noise_upcall;
int err;
uint16_t listen_port;
const void *key;
size_t size;
err = 0;
dev = iflib_get_dev(ctx);
if (params == NULL) {
key = NULL;
listen_port = 0;
nvl = NULL;
packed = NULL;
goto unpacked;
}
if (copyin(params, &iov, sizeof(iov)))
return (EFAULT);
/* check that this is reasonable */
size = iov.iov_len;
packed = malloc(size, M_TEMP, M_WAITOK);
if (copyin(iov.iov_base, packed, size)) {
err = EFAULT;
goto out;
}
nvl = nvlist_unpack(packed, size, 0);
if (nvl == NULL) {
device_printf(dev, "%s nvlist_unpack failed\n", __func__);
err = EBADMSG;
goto out;
}
if (!nvlist_exists_number(nvl, "listen-port")) {
device_printf(dev, "%s listen-port not set\n", __func__);
err = EBADMSG;
goto nvl_out;
}
listen_port = nvlist_get_number(nvl, "listen-port");
if (!nvlist_exists_binary(nvl, "private-key")) {
device_printf(dev, "%s private-key not set\n", __func__);
err = EBADMSG;
goto nvl_out;
}
key = nvlist_get_binary(nvl, "private-key", &size);
if (size != CURVE25519_KEY_SIZE) {
device_printf(dev, "%s bad length for private-key %zu\n", __func__, size);
err = EBADMSG;
goto nvl_out;
}
unpacked:
local = &sc->sc_local;
noise_upcall.u_arg = sc;
noise_upcall.u_remote_get =
(struct noise_remote *(*)(void *, uint8_t *))wg_remote_get;
noise_upcall.u_index_set =
(uint32_t (*)(void *, struct noise_remote *))wg_index_set;
noise_upcall.u_index_drop =
(void (*)(void *, uint32_t))wg_index_drop;
noise_local_init(local, &noise_upcall);
cookie_checker_init(&sc->sc_cookie, ratelimit_zone);
sc->sc_socket.so_port = listen_port;
if (key != NULL) {
noise_local_set_private(local, __DECONST(uint8_t *, key));
noise_local_keys(local, public, NULL);
cookie_checker_update(&sc->sc_cookie, public);
}
atomic_add_int(&clone_count, 1);
scctx = sc->shared = iflib_get_softc_ctx(ctx);
scctx->isc_capenable = WG_CAPS;
scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_IP6_TCP \
| CSUM_IP6_UDP | CSUM_IP6_TCP;
sc->wg_ctx = ctx;
sc->sc_ifp = iflib_get_ifp(ctx);
mbufq_init(&sc->sc_handshake_queue, MAX_QUEUED_INCOMING_HANDSHAKES);
mtx_init(&sc->sc_mtx, NULL, "wg softc lock", MTX_DEF);
rw_init(&sc->sc_index_lock, "wg index lock");
sc->sc_encap_ring = buf_ring_alloc(MAX_QUEUED_PACKETS, M_WG, M_WAITOK, &sc->sc_mtx);
sc->sc_decap_ring = buf_ring_alloc(MAX_QUEUED_PACKETS, M_WG, M_WAITOK, &sc->sc_mtx);
GROUPTASK_INIT(&sc->sc_handshake, 0,
(gtask_fn_t *)wg_softc_handshake_receive, sc);
taskqgroup_attach(qgroup_if_io_tqg, &sc->sc_handshake, sc, dev, NULL, "wg tx initiation");
crypto_taskq_setup(sc);
nvl_out:
if (nvl != NULL)
nvlist_destroy(nvl);
out:
free(packed, M_TEMP);
return (err);
}
static int
wg_transmit(struct ifnet *ifp, struct mbuf *m)
{
struct wg_softc *sc;
sa_family_t family;
struct epoch_tracker et;
struct wg_peer *peer;
struct wg_tag *t;
uint32_t af;
int rc;
/*
* Work around lifetime issue in the ipv6 mld code.
*/
if (__predict_false(ifp->if_flags & IFF_DYING))
return (ENXIO);
rc = 0;
sc = iflib_get_softc(ifp->if_softc);
if ((t = wg_tag_get(m)) == NULL) {
rc = ENOBUFS;
goto early_out;
}
af = m->m_pkthdr.ph_family;
BPF_MTAP2(ifp, &af, sizeof(af), m);
NET_EPOCH_ENTER(et);
peer = wg_route_lookup(&sc->sc_routes, m, OUT);
if (__predict_false(peer == NULL)) {
rc = ENOKEY;
printf("peer not found - dropping %p\n", m);
/* XXX log */
goto err;
}
family = atomic_load_acq(peer->p_endpoint.e_remote.r_sa.sa_family);
if (__predict_false(family != AF_INET && family != AF_INET6)) {
rc = EHOSTUNREACH;
/* XXX log */
goto err;
}
t->t_peer = peer;
t->t_mbuf = NULL;
t->t_done = 0;
t->t_mtu = ifp->if_mtu;
rc = wg_queue_out(peer, m);
if (rc == 0)
wg_encrypt_dispatch(peer->p_sc);
NET_EPOCH_EXIT(et);
return (rc);
err:
NET_EPOCH_EXIT(et);
early_out:
if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
/* XXX send ICMP unreachable */
m_free(m);
return (rc);
}
static int
wg_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa, struct route *rt)
{
m->m_pkthdr.ph_family = sa->sa_family;
return (wg_transmit(ifp, m));
}
static int
wg_attach_post(if_ctx_t ctx)
{
struct ifnet *ifp;
struct wg_softc *sc;
sc = iflib_get_softc(ctx);
ifp = iflib_get_ifp(ctx);
if_setmtu(ifp, ETHERMTU - 80);
if_setflagbits(ifp, IFF_NOARP, IFF_POINTOPOINT);
ifp->if_transmit = wg_transmit;
ifp->if_output = wg_output;
wg_hashtable_init(&sc->sc_hashtable);
sc->sc_index = hashinit(HASHTABLE_INDEX_SIZE, M_DEVBUF, &sc->sc_index_mask);
wg_route_init(&sc->sc_routes);
return (0);
}
static int
wg_mtu_set(if_ctx_t ctx, uint32_t mtu)
{
return (0);
}
static int
wg_set_promisc(if_ctx_t ctx, int flags)
{
return (0);
}
static int
wg_detach(if_ctx_t ctx)
{
struct wg_softc *sc;
sc = iflib_get_softc(ctx);
if_link_state_change(sc->sc_ifp, LINK_STATE_DOWN);
NET_EPOCH_WAIT();
wg_socket_reinit(sc, NULL, NULL);
taskqgroup_drain_all(qgroup_if_io_tqg);
pause("link_down", hz/4);
wg_peer_remove_all(sc);
pause("link_down", hz);
mtx_destroy(&sc->sc_mtx);
rw_destroy(&sc->sc_index_lock);
taskqgroup_detach(qgroup_if_io_tqg, &sc->sc_handshake);
crypto_taskq_destroy(sc);
buf_ring_free(sc->sc_encap_ring, M_WG);
buf_ring_free(sc->sc_decap_ring, M_WG);
wg_route_destroy(&sc->sc_routes);
wg_hashtable_destroy(&sc->sc_hashtable);
atomic_add_int(&clone_count, -1);
return (0);
}
static void
wg_init(if_ctx_t ctx)
{
struct ifnet *ifp;
struct wg_softc *sc;
int rc;
sc = iflib_get_softc(ctx);
ifp = iflib_get_ifp(ctx);
rc = wg_socket_init(sc);
if (rc)
return;
if_link_state_change(ifp, LINK_STATE_UP);
}
static void
wg_stop(if_ctx_t ctx)
{
struct wg_softc *sc;
struct ifnet *ifp;
sc = iflib_get_softc(ctx);
ifp = iflib_get_ifp(ctx);
if_link_state_change(ifp, LINK_STATE_DOWN);
}
static nvlist_t *
wg_peer_to_nvl(struct wg_peer *peer)
{
struct wg_route *rt;
int i, count;
nvlist_t *nvl;
caddr_t key;
struct wg_allowedip *aip;
if ((nvl = nvlist_create(0)) == NULL)
return (NULL);
key = peer->p_remote.r_public;
nvlist_add_binary(nvl, "public-key", key, WG_KEY_SIZE);
nvlist_add_binary(nvl, "endpoint", &peer->p_endpoint.e_remote, sizeof(struct sockaddr));
i = count = 0;
CK_LIST_FOREACH(rt, &peer->p_routes, r_entry) {
count++;
}
aip = malloc(count*sizeof(*aip), M_TEMP, M_WAITOK);
CK_LIST_FOREACH(rt, &peer->p_routes, r_entry) {
memcpy(&aip[i++], &rt->r_cidr, sizeof(*aip));
}
nvlist_add_binary(nvl, "allowed-ips", aip, count*sizeof(*aip));
free(aip, M_TEMP);
return (nvl);
}
static int
wg_marshal_peers(struct wg_softc *sc, nvlist_t **nvlp, nvlist_t ***nvl_arrayp, int *peer_countp)
{
struct wg_peer *peer;
int err, i, peer_count;
nvlist_t *nvl, **nvl_array;
struct epoch_tracker et;
#ifdef INVARIANTS
void *packed;
size_t size;
#endif
nvl = NULL;
nvl_array = NULL;
if (nvl_arrayp)
*nvl_arrayp = NULL;
if (nvlp)
*nvlp = NULL;
if (peer_countp)
*peer_countp = 0;
peer_count = sc->sc_hashtable.h_num_peers;
if (peer_count == 0) {
printf("no peers found\n");
return (ENOENT);
}
if (nvlp && (nvl = nvlist_create(0)) == NULL)
return (ENOMEM);
err = i = 0;
nvl_array = malloc(peer_count*sizeof(void*), M_TEMP, M_WAITOK);
NET_EPOCH_ENTER(et);
CK_LIST_FOREACH(peer, &sc->sc_hashtable.h_peers_list, p_entry) {
nvl_array[i] = wg_peer_to_nvl(peer);
if (nvl_array[i] == NULL) {
printf("wg_peer_to_nvl failed on %d peer\n", i);
break;
}
#ifdef INVARIANTS
packed = nvlist_pack(nvl_array[i], &size);
if (packed == NULL) {
printf("nvlist_pack(%p, %p) => %d",
nvl_array[i], &size, nvlist_error(nvl));
}
free(packed, M_NVLIST);
#endif
i++;
if (i == peer_count)
break;
}
NET_EPOCH_EXIT(et);
*peer_countp = peer_count = i;
if (peer_count == 0) {
printf("no peers found in list\n");
err = ENOENT;
goto out;
}
if (nvl) {
nvlist_add_nvlist_array(nvl, "peer-list",
(const nvlist_t * const *)nvl_array, peer_count);
if ((err = nvlist_error(nvl))) {
printf("nvlist_add_nvlist_array(%p, \"peer-list\", %p, %d) => %d\n",
nvl, nvl_array, peer_count, err);
goto out;
}
*nvlp = nvl;
}
*nvl_arrayp = nvl_array;
return (0);
out:
return (err);
}
static int
wgc_get(struct wg_softc *sc, struct ifdrv *ifd)
{
nvlist_t *nvl, **nvl_array;
void *packed;
size_t size;
int peer_count, err;
nvl = nvlist_create(0);
if (nvl == NULL)
return (ENOMEM);
err = 0;
packed = NULL;
if (sc->sc_socket.so_port != 0)
nvlist_add_number(nvl, "listen-port", sc->sc_socket.so_port);
if (sc->sc_local.l_has_identity) {
nvlist_add_binary(nvl, "public-key", sc->sc_local.l_public, WG_KEY_SIZE);
if (curthread->td_ucred->cr_uid == 0)
nvlist_add_binary(nvl, "private-key", sc->sc_local.l_private, WG_KEY_SIZE);
}
if (sc->sc_hashtable.h_num_peers > 0) {
err = wg_marshal_peers(sc, NULL, &nvl_array, &peer_count);
if (err)
goto out;
nvlist_add_nvlist_array(nvl, "peer-list",
(const nvlist_t * const *)nvl_array, peer_count);
}
packed = nvlist_pack(nvl, &size);
if (packed == NULL)
return (ENOMEM);
if (ifd->ifd_len == 0) {
ifd->ifd_len = size;
goto out;
}
if (ifd->ifd_len < size) {
err = ENOSPC;
goto out;
}
if (ifd->ifd_data == NULL) {
err = EFAULT;
goto out;
}
err = copyout(packed, ifd->ifd_data, size);
ifd->ifd_len = size;
out:
nvlist_destroy(nvl);
free(packed, M_NVLIST);
return (err);
}
static bool
wg_allowedip_valid(const struct wg_allowedip *wip)
{
return (true);
}
static int
wg_peer_add(struct wg_softc *sc, const nvlist_t *nvl)
{
uint8_t public[WG_KEY_SIZE];
const void *pub_key;
const struct sockaddr *endpoint;
int i, err, allowedip_count;
device_t dev;
size_t size;
struct wg_peer *peer = NULL;
bool need_insert = false;
dev = iflib_get_dev(sc->wg_ctx);
if (!nvlist_exists_binary(nvl, "public-key")) {
device_printf(dev, "peer has no public-key\n");
return (EINVAL);
}
pub_key = nvlist_get_binary(nvl, "public-key", &size);
if (size != CURVE25519_KEY_SIZE) {
device_printf(dev, "%s bad length for public-key %zu\n", __func__, size);
return (EINVAL);
}
if (noise_local_keys(&sc->sc_local, public, NULL) == 0 &&
bcmp(public, pub_key, WG_KEY_SIZE) == 0) {
device_printf(dev, "public-key for peer already in use by host\n");
return (EINVAL);
}
peer = wg_peer_lookup(sc, pub_key);
if (nvlist_exists_bool(nvl, "peer-remove") &&
nvlist_get_bool(nvl, "peer-remove")) {
if (peer != NULL) {
wg_hashtable_peer_remove(&sc->sc_hashtable, peer);
wg_peer_destroy(peer);
/* XXX free */
printf("peer removed\n");
}
return (0);
}
if (nvlist_exists_bool(nvl, "replace-allowedips") &&
nvlist_get_bool(nvl, "replace-allowedips") &&
peer != NULL) {
wg_route_delete(&peer->p_sc->sc_routes, peer);
}
if (peer == NULL) {
need_insert = true;
peer = wg_peer_alloc(sc);
noise_remote_init(&peer->p_remote, pub_key, &sc->sc_local);
cookie_maker_init(&peer->p_cookie, pub_key);
}
if (nvlist_exists_binary(nvl, "endpoint")) {
endpoint = nvlist_get_binary(nvl, "endpoint", &size);
if (size != sizeof(*endpoint)) {
device_printf(dev, "%s bad length for endpoint %zu\n", __func__, size);
err = EBADMSG;
goto out;
}
memcpy(&peer->p_endpoint.e_remote, endpoint,
sizeof(peer->p_endpoint.e_remote));
}
if (nvlist_exists_binary(nvl, "pre-shared-key")) {
const void *key;
key = nvlist_get_binary(nvl, "pre-shared-key", &size);
noise_remote_set_psk(&peer->p_remote, key);
}
if (nvlist_exists_number(nvl, "persistent-keepalive-interval")) {
uint16_t pki;
pki = nvlist_get_number(nvl, "persistent-keepalive-interval");
wg_timers_set_persistent_keepalive(&peer->p_timers, pki);
}
if (nvlist_exists_binary(nvl, "allowed-ips")) {
const struct wg_allowedip *aip, *aip_base;
aip = aip_base = nvlist_get_binary(nvl, "allowed-ips", &size);
if (size % sizeof(struct wg_allowedip) != 0) {
device_printf(dev, "%s bad length for allowed-ips %zu not integer multiple of struct size\n", __func__, size);
err = EBADMSG;
goto out;
}
allowedip_count = size/sizeof(struct wg_allowedip);
for (i = 0; i < allowedip_count; i++) {
if (!wg_allowedip_valid(&aip_base[i])) {
device_printf(dev, "%s allowedip %d not valid\n", __func__, i);
err = EBADMSG;
goto out;
}
}
for (int i = 0; i < allowedip_count; i++, aip++) {
if ((err = wg_route_add(&sc->sc_routes, peer, aip)) != 0) {
printf("route add %d failed -> %d\n", i, err);
}
}
}
if (need_insert)
wg_hashtable_peer_insert(&sc->sc_hashtable, peer);
return (0);
out:
wg_peer_destroy(peer);
return (err);
}
static int
wgc_set(struct wg_softc *sc, struct ifdrv *ifd)
{
uint8_t public[WG_KEY_SIZE];
void *nvlpacked;
nvlist_t *nvl;
device_t dev;
ssize_t size;
int err;
if (ifd->ifd_len == 0 || ifd->ifd_data == NULL)
return (EFAULT);
dev = iflib_get_dev(sc->wg_ctx);
nvlpacked = malloc(ifd->ifd_len, M_TEMP, M_WAITOK);
err = copyin(ifd->ifd_data, nvlpacked, ifd->ifd_len);
if (err)
goto out;
nvl = nvlist_unpack(nvlpacked, ifd->ifd_len, 0);
if (nvl == NULL) {
device_printf(dev, "%s nvlist_unpack failed\n", __func__);
err = EBADMSG;
goto out;
}
if (nvlist_exists_bool(nvl, "replace-peers") &&
nvlist_get_bool(nvl, "replace-peers"))
wg_peer_remove_all(sc);
if (nvlist_exists_number(nvl, "listen-port")) {
int listen_port __unused = nvlist_get_number(nvl, "listen-port");
/*
* Set listen port
*/
if_link_state_change(sc->sc_ifp, LINK_STATE_DOWN);
pause("link_down", hz/4);
wg_socket_reinit(sc, NULL, NULL);
sc->sc_socket.so_port = listen_port;
if ((err = wg_socket_init(sc)) != 0)
goto out;
if_link_state_change(sc->sc_ifp, LINK_STATE_UP);
}
if (nvlist_exists_binary(nvl, "private-key")) {
struct noise_local *local;
const void *key = nvlist_get_binary(nvl, "private-key", &size);
if (size != CURVE25519_KEY_SIZE) {
device_printf(dev, "%s bad length for private-key %zu\n", __func__, size);
err = EBADMSG;
goto nvl_out;
}
/*
* set private key
*/
local = &sc->sc_local;
noise_local_set_private(local, __DECONST(uint8_t *, key));
noise_local_keys(local, public, NULL);
cookie_checker_update(&sc->sc_cookie, public);
}
if (nvlist_exists_number(nvl, "user-cookie")) {
sc->sc_user_cookie = nvlist_get_number(nvl, "user-cookie");
/*
* setsockopt
*/
}
if (nvlist_exists_nvlist_array(nvl, "peer-list")) {
size_t peercount;
const nvlist_t * const*nvl_peers;
nvl_peers = nvlist_get_nvlist_array(nvl, "peer-list", &peercount);
for (int i = 0; i < peercount; i++) {
wg_peer_add(sc, nvl_peers[i]);
}
}
nvl_out:
nvlist_destroy(nvl);
out:
free(nvlpacked, M_TEMP);
return (err);
}
static int
wg_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data)
{
struct wg_softc *sc = iflib_get_softc(ctx);
struct ifdrv *ifd = (struct ifdrv *)data;
int ifd_cmd;
switch (command) {
case SIOCGDRVSPEC:
case SIOCSDRVSPEC:
ifd_cmd = ifd->ifd_cmd;
break;
default:
return (EINVAL);
}
switch (ifd_cmd) {
case WGC_GET:
return (wgc_get(sc, ifd));
break;
case WGC_SET:
if (priv_check(curthread, PRIV_NET_HWIOCTL))
return (EPERM);
return (wgc_set(sc, ifd));
break;
}
return (ENOTSUP);
}
static device_method_t wg_if_methods[] = {
DEVMETHOD(ifdi_cloneattach, wg_cloneattach),
DEVMETHOD(ifdi_attach_post, wg_attach_post),
DEVMETHOD(ifdi_detach, wg_detach),
DEVMETHOD(ifdi_init, wg_init),
DEVMETHOD(ifdi_stop, wg_stop),
DEVMETHOD(ifdi_priv_ioctl, wg_priv_ioctl),
DEVMETHOD(ifdi_mtu_set, wg_mtu_set),
DEVMETHOD(ifdi_promisc_set, wg_set_promisc),
DEVMETHOD_END
};
static driver_t wg_iflib_driver = {
"wg", wg_if_methods, sizeof(struct wg_softc)
};
char wg_driver_version[] = "0.0.1";
static struct if_shared_ctx wg_sctx_init = {
.isc_magic = IFLIB_MAGIC,
.isc_driver_version = wg_driver_version,
.isc_driver = &wg_iflib_driver,
.isc_flags = IFLIB_PSEUDO,
.isc_name = "wg",
};
if_shared_ctx_t wg_sctx = &wg_sctx_init;
static if_pseudo_t wg_pseudo;
int
wg_ctx_init(void)
{
ratelimit_zone = uma_zcreate("wg ratelimit", sizeof(struct ratelimit),
NULL, NULL, NULL, NULL, 0, 0);
return (0);
}
void
wg_ctx_uninit(void)
{
uma_zdestroy(ratelimit_zone);
}
static int
wg_module_init(void)
{
int rc;
if ((rc = wg_ctx_init()))
return (rc);
wg_pseudo = iflib_clone_register(wg_sctx);
if (wg_pseudo == NULL)
return (ENXIO);
return (0);
}
static void
wg_module_deinit(void)
{
wg_ctx_uninit();
iflib_clone_deregister(wg_pseudo);
}
static int
wg_module_event_handler(module_t mod, int what, void *arg)
{
int err;
switch (what) {
case MOD_LOAD:
if ((err = wg_module_init()) != 0)
return (err);
break;
case MOD_UNLOAD:
if (clone_count == 0)
wg_module_deinit();
else
return (EBUSY);
break;
default:
return (EOPNOTSUPP);
}
return (0);
}
static moduledata_t wg_moduledata = {
"wg",
wg_module_event_handler,
NULL
};
DECLARE_MODULE(wg, wg_moduledata, SI_SUB_PSEUDO, SI_ORDER_ANY);
MODULE_VERSION(wg, 1);
MODULE_DEPEND(wg, iflib, 1, 1, 1);
MODULE_DEPEND(wg, blake2, 1, 1, 1);
MODULE_DEPEND(wg, crypto, 1, 1, 1);

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,399 @@
/*
* Copyright (C) 2015-2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
* Copyright (C) 2019-2020 Matt Dunwoodie <ncon@noconroy.net>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/param.h>
#include <sys/rwlock.h>
#include <sys/malloc.h> /* Because systm doesn't include M_NOWAIT, M_DEVBUF */
#include <sys/socket.h>
#include <sys/wg_cookie.h>
#include <zinc/chacha20poly1305.h>
static void cookie_precompute_key(uint8_t *,
const uint8_t[COOKIE_INPUT_SIZE], const char *);
static void cookie_macs_mac1(struct cookie_macs *, const void *, size_t,
const uint8_t[COOKIE_KEY_SIZE]);
static void cookie_macs_mac2(struct cookie_macs *, const void *, size_t,
const uint8_t[COOKIE_COOKIE_SIZE]);
static int cookie_timer_expired(struct timespec *, time_t, long);
static void cookie_checker_make_cookie(struct cookie_checker *,
uint8_t[COOKIE_COOKIE_SIZE], struct sockaddr *);
static void ratelimit_gc(struct ratelimit *, int);
static int ratelimit_allow(struct ratelimit *, struct sockaddr *);
/* Public Functions */
void
cookie_maker_init(struct cookie_maker *cp, const uint8_t key[COOKIE_INPUT_SIZE])
{
bzero(cp, sizeof(*cp));
cookie_precompute_key(cp->cp_mac1_key, key, COOKIE_MAC1_KEY_LABEL);
cookie_precompute_key(cp->cp_cookie_key, key, COOKIE_COOKIE_KEY_LABEL);
rw_init(&cp->cp_lock, "cookie_maker");
}
int
cookie_checker_init(struct cookie_checker *cc, uma_zone_t zone)
{
struct ratelimit *rl = &cc->cc_ratelimit;
bzero(cc, sizeof(*cc));
rw_init(&cc->cc_key_lock, "cookie_checker_key");
rw_init(&cc->cc_secret_lock, "cookie_checker_secret");
rw_init(&rl->rl_lock, "ratelimit_lock");
arc4random_buf(&rl->rl_secret, sizeof(rl->rl_secret));
rl->rl_table = hashinit(RATELIMIT_SIZE, M_DEVBUF, &rl->rl_table_mask);
rl->rl_zone = zone;
return (0);
}
void
cookie_checker_update(struct cookie_checker *cc,
uint8_t key[COOKIE_INPUT_SIZE])
{
rw_enter_write(&cc->cc_key_lock);
if (key) {
cookie_precompute_key(cc->cc_mac1_key, key, COOKIE_MAC1_KEY_LABEL);
cookie_precompute_key(cc->cc_cookie_key, key, COOKIE_COOKIE_KEY_LABEL);
} else {
bzero(cc->cc_mac1_key, sizeof(cc->cc_mac1_key));
bzero(cc->cc_cookie_key, sizeof(cc->cc_cookie_key));
}
rw_exit_write(&cc->cc_key_lock);
}
void
cookie_checker_deinit(struct cookie_checker *cc)
{
struct ratelimit *rl = &cc->cc_ratelimit;
rw_enter_write(&rl->rl_lock);
ratelimit_gc(rl, 1);
hashdestroy(rl->rl_table, M_DEVBUF, rl->rl_table_mask);
rw_exit_write(&rl->rl_lock);
}
void
cookie_checker_create_payload(struct cookie_checker *cc,
struct cookie_macs *cm, uint8_t nonce[COOKIE_XNONCE_SIZE],
uint8_t ecookie[COOKIE_ENCRYPTED_SIZE], struct sockaddr *sa)
{
uint8_t cookie[COOKIE_COOKIE_SIZE];
cookie_checker_make_cookie(cc, cookie, sa);
arc4random_buf(nonce, COOKIE_XNONCE_SIZE);
rw_enter_read(&cc->cc_key_lock);
xchacha20poly1305_encrypt(ecookie, cookie, COOKIE_COOKIE_SIZE,
cm->mac1, COOKIE_MAC_SIZE, nonce, cc->cc_cookie_key);
rw_exit_read(&cc->cc_key_lock);
explicit_bzero(cookie, sizeof(cookie));
}
int
cookie_maker_consume_payload(struct cookie_maker *cp,
uint8_t nonce[COOKIE_XNONCE_SIZE], uint8_t ecookie[COOKIE_ENCRYPTED_SIZE])
{
int ret = 0;
uint8_t cookie[COOKIE_COOKIE_SIZE];
rw_enter_write(&cp->cp_lock);
if (cp->cp_mac1_valid == 0) {
ret = ETIMEDOUT;
goto error;
}
if (xchacha20poly1305_decrypt(cookie, ecookie, COOKIE_ENCRYPTED_SIZE,
cp->cp_mac1_last, COOKIE_MAC_SIZE, nonce, cp->cp_cookie_key) == 0) {
ret = EINVAL;
goto error;
}
memcpy(cp->cp_cookie, cookie, COOKIE_COOKIE_SIZE);
getnanouptime(&cp->cp_birthdate);
cp->cp_mac1_valid = 0;
error:
rw_exit_write(&cp->cp_lock);
return ret;
}
void
cookie_maker_mac(struct cookie_maker *cp, struct cookie_macs *cm, void *buf,
size_t len)
{
rw_enter_read(&cp->cp_lock);
cookie_macs_mac1(cm, buf, len, cp->cp_mac1_key);
memcpy(cp->cp_mac1_last, cm->mac1, COOKIE_MAC_SIZE);
cp->cp_mac1_valid = 1;
if (!cookie_timer_expired(&cp->cp_birthdate,
COOKIE_SECRET_MAX_AGE - COOKIE_SECRET_LATENCY, 0))
cookie_macs_mac2(cm, buf, len, cp->cp_cookie);
else
bzero(cm->mac2, COOKIE_MAC_SIZE);
rw_exit_read(&cp->cp_lock);
}
int
cookie_checker_validate_macs(struct cookie_checker *cc, struct cookie_macs *cm,
void *buf, size_t len, int busy, struct sockaddr *sa)
{
struct cookie_macs our_cm;
uint8_t cookie[COOKIE_COOKIE_SIZE];
/* Validate incoming MACs */
rw_enter_read(&cc->cc_key_lock);
cookie_macs_mac1(&our_cm, buf, len, cc->cc_mac1_key);
rw_exit_read(&cc->cc_key_lock);
/* If mac1 is invald, we want to drop the packet */
if (timingsafe_bcmp(our_cm.mac1, cm->mac1, COOKIE_MAC_SIZE) != 0)
return EINVAL;
if (busy != 0) {
cookie_checker_make_cookie(cc, cookie, sa);
cookie_macs_mac2(&our_cm, buf, len, cookie);
/* If the mac2 is invalid, we want to send a cookie response */
if (timingsafe_bcmp(our_cm.mac2, cm->mac2, COOKIE_MAC_SIZE) != 0)
return EAGAIN;
/* If the mac2 is valid, we may want rate limit the peer.
* ratelimit_allow will return either 0 or ECONNREFUSED,
* implying there is no ratelimiting, or we should ratelimit
* (refuse) respectively. */
return ratelimit_allow(&cc->cc_ratelimit, sa);
}
return 0;
}
/* Private functions */
static void
cookie_precompute_key(uint8_t *key, const uint8_t input[COOKIE_INPUT_SIZE],
const char *label)
{
struct blake2s_state blake;
blake2s_init(&blake, COOKIE_KEY_SIZE);
blake2s_update(&blake, label, strlen(label));
blake2s_update(&blake, input, COOKIE_INPUT_SIZE);
blake2s_final(&blake, key, COOKIE_KEY_SIZE);
}
static void
cookie_macs_mac1(struct cookie_macs *cm, const void *buf, size_t len,
const uint8_t key[COOKIE_KEY_SIZE])
{
struct blake2s_state state;
blake2s_init_key(&state, COOKIE_MAC_SIZE, key, COOKIE_KEY_SIZE);
blake2s_update(&state, buf, len);
blake2s_final(&state, cm->mac1, COOKIE_MAC_SIZE);
}
static void
cookie_macs_mac2(struct cookie_macs *cm, const void *buf, size_t len,
const uint8_t key[COOKIE_COOKIE_SIZE])
{
struct blake2s_state state;
blake2s_init_key(&state, COOKIE_MAC_SIZE, key, COOKIE_COOKIE_SIZE);
blake2s_update(&state, buf, len);
blake2s_update(&state, cm->mac1, COOKIE_MAC_SIZE);
blake2s_final(&state, cm->mac2, COOKIE_MAC_SIZE);
}
static int
cookie_timer_expired(struct timespec *birthdate, time_t sec, long nsec)
{
struct timespec uptime;
struct timespec expire = { .tv_sec = sec, .tv_nsec = nsec };
if (birthdate->tv_sec == 0 && birthdate->tv_nsec == 0)
return ETIMEDOUT;
getnanouptime(&uptime);
timespecadd(birthdate, &expire, &expire);
return timespeccmp(&uptime, &expire, >) ? ETIMEDOUT : 0;
}
static void
cookie_checker_make_cookie(struct cookie_checker *cc,
uint8_t cookie[COOKIE_COOKIE_SIZE], struct sockaddr *sa)
{
struct blake2s_state state;
rw_enter_write(&cc->cc_secret_lock);
if (cookie_timer_expired(&cc->cc_secret_birthdate,
COOKIE_SECRET_MAX_AGE, 0)) {
arc4random_buf(cc->cc_secret, COOKIE_SECRET_SIZE);
getnanouptime(&cc->cc_secret_birthdate);
}
blake2s_init_key(&state, COOKIE_COOKIE_SIZE, cc->cc_secret,
COOKIE_SECRET_SIZE);
rw_exit_write(&cc->cc_secret_lock);
if (sa->sa_family == AF_INET) {
blake2s_update(&state, (uint8_t *)&satosin(sa)->sin_addr,
sizeof(struct in_addr));
blake2s_update(&state, (uint8_t *)&satosin(sa)->sin_port,
sizeof(in_port_t));
blake2s_final(&state, cookie, COOKIE_COOKIE_SIZE);
} else if (sa->sa_family == AF_INET6) {
blake2s_update(&state, (uint8_t *)&satosin6(sa)->sin6_addr,
sizeof(struct in6_addr));
blake2s_update(&state, (uint8_t *)&satosin6(sa)->sin6_port,
sizeof(in_port_t));
blake2s_final(&state, cookie, COOKIE_COOKIE_SIZE);
} else {
arc4random_buf(cookie, COOKIE_COOKIE_SIZE);
}
}
static void
ratelimit_gc(struct ratelimit *rl, int force)
{
size_t i;
struct ratelimit_entry *r, *tr;
struct timespec expiry;
rw_assert(&rl->rl_lock, RA_WLOCKED);
if (force) {
for (i = 0; i < RATELIMIT_SIZE; i++) {
LIST_FOREACH_SAFE(r, &rl->rl_table[i], r_entry, tr) {
rl->rl_table_num--;
LIST_REMOVE(r, r_entry);
uma_zfree(rl->rl_zone, r);
}
}
return;
}
if ((cookie_timer_expired(&rl->rl_last_gc, ELEMENT_TIMEOUT, 0) &&
rl->rl_table_num > 0)) {
getnanouptime(&rl->rl_last_gc);
getnanouptime(&expiry);
expiry.tv_sec -= ELEMENT_TIMEOUT;
for (i = 0; i < RATELIMIT_SIZE; i++) {
LIST_FOREACH_SAFE(r, &rl->rl_table[i], r_entry, tr) {
if (timespeccmp(&r->r_last_time, &expiry, <)) {
rl->rl_table_num--;
LIST_REMOVE(r, r_entry);
uma_zfree(rl->rl_zone, r);
}
}
}
}
}
static int
ratelimit_allow(struct ratelimit *rl, struct sockaddr *sa)
{
uint64_t key, tokens;
struct timespec diff;
struct ratelimit_entry *r;
int ret = ECONNREFUSED;
if (sa->sa_family == AF_INET)
key = siphash24(&rl->rl_secret, &satosin(sa)->sin_addr,
IPV4_MASK_SIZE);
else if (sa->sa_family == AF_INET6)
key = siphash24(&rl->rl_secret, &satosin6(sa)->sin6_addr,
IPV6_MASK_SIZE);
else
return ret;
rw_enter_write(&rl->rl_lock);
LIST_FOREACH(r, &rl->rl_table[key & rl->rl_table_mask], r_entry) {
if (r->r_af != sa->sa_family)
continue;
if (r->r_af == AF_INET && bcmp(&r->r_in,
&satosin(sa)->sin_addr, IPV4_MASK_SIZE) != 0)
continue;
if (r->r_af == AF_INET6 && bcmp(&r->r_in6,
&satosin6(sa)->sin6_addr, IPV6_MASK_SIZE) != 0)
continue;
/* If we get to here, we've found an entry for the endpoint.
* We apply standard token bucket, by calculating the time
* lapsed since our last_time, adding that, ensuring that we
* cap the tokens at TOKEN_MAX. If the endpoint has no tokens
* left (that is tokens <= INITIATION_COST) then we block the
* request, otherwise we subtract the INITITIATION_COST and
* return OK. */
diff = r->r_last_time;
getnanouptime(&r->r_last_time);
timespecsub(&r->r_last_time, &diff, &diff);
tokens = r->r_tokens + diff.tv_sec * NSEC_PER_SEC + diff.tv_nsec;
if (tokens > TOKEN_MAX)
tokens = TOKEN_MAX;
if (tokens > INITIATION_COST) {
r->r_tokens = tokens - INITIATION_COST;
goto ok;
} else {
r->r_tokens = tokens;
goto error;
}
}
/* If we get to here, we didn't have an entry for the endpoint. */
ratelimit_gc(rl, 0);
/* Hard limit on number of entries */
if (rl->rl_table_num >= RATELIMIT_SIZE_MAX * 8)
goto error;
/* Goto error if out of memory */
if ((r = uma_zalloc(rl->rl_zone, M_NOWAIT)) == NULL)
goto error;
rl->rl_table_num++;
/* Insert entry into the hashtable and ensure it's initialised */
LIST_INSERT_HEAD(&rl->rl_table[key & rl->rl_table_mask], r, r_entry);
r->r_af = sa->sa_family;
if (r->r_af == AF_INET)
memcpy(&r->r_in, &satosin(sa)->sin_addr, IPV4_MASK_SIZE);
else if (r->r_af == AF_INET6)
memcpy(&r->r_in6, &satosin6(sa)->sin6_addr, IPV6_MASK_SIZE);
getnanouptime(&r->r_last_time);
r->r_tokens = TOKEN_MAX - INITIATION_COST;
ok:
ret = 0;
error:
rw_exit_write(&rl->rl_lock);
return ret;
}

View File

@ -0,0 +1,958 @@
/*
* Copyright (C) 2015-2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
* Copyright (C) 2019-2020 Matt Dunwoodie <ncon@noconroy.net>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/param.h>
#include <sys/rwlock.h>
#include <sys/wg_noise.h>
#include <crypto/blake2s.h>
#include <crypto/curve25519.h>
#include <zinc/chacha20poly1305.h>
/* Private functions */
static struct noise_keypair *
noise_remote_keypair_allocate(struct noise_remote *);
static void
noise_remote_keypair_free(struct noise_remote *,
struct noise_keypair *);
static uint32_t noise_remote_handshake_index_get(struct noise_remote *);
static void noise_remote_handshake_index_drop(struct noise_remote *);
static uint64_t noise_counter_send(struct noise_counter *);
static int noise_counter_recv(struct noise_counter *, uint64_t);
static void noise_kdf(uint8_t *, uint8_t *, uint8_t *, const uint8_t *,
size_t, size_t, size_t, size_t,
const uint8_t [NOISE_HASH_SIZE]);
static int noise_mix_dh(
uint8_t [NOISE_HASH_SIZE],
uint8_t [NOISE_SYMMETRIC_SIZE],
const uint8_t [NOISE_KEY_SIZE],
const uint8_t [NOISE_KEY_SIZE]);
static int noise_mix_ss(
uint8_t ck[NOISE_HASH_SIZE],
uint8_t key[NOISE_SYMMETRIC_SIZE],
const uint8_t ss[NOISE_KEY_SIZE]);
static void noise_mix_hash(
uint8_t [NOISE_HASH_SIZE],
const uint8_t *,
size_t);
static void noise_mix_psk(
uint8_t [NOISE_HASH_SIZE],
uint8_t [NOISE_HASH_SIZE],
uint8_t [NOISE_SYMMETRIC_SIZE],
const uint8_t [NOISE_KEY_SIZE]);
static void noise_param_init(
uint8_t [NOISE_HASH_SIZE],
uint8_t [NOISE_HASH_SIZE],
const uint8_t [NOISE_KEY_SIZE]);
static void noise_msg_encrypt(uint8_t *, const uint8_t *, size_t,
uint8_t [NOISE_SYMMETRIC_SIZE],
uint8_t [NOISE_HASH_SIZE]);
static int noise_msg_decrypt(uint8_t *, const uint8_t *, size_t,
uint8_t [NOISE_SYMMETRIC_SIZE],
uint8_t [NOISE_HASH_SIZE]);
static void noise_msg_ephemeral(
uint8_t [NOISE_HASH_SIZE],
uint8_t [NOISE_HASH_SIZE],
const uint8_t src[NOISE_KEY_SIZE]);
static void noise_tai64n_now(uint8_t [NOISE_TIMESTAMP_SIZE]);
static int noise_timer_expired(struct timespec *, time_t, long);
/* Set/Get noise parameters */
void
noise_local_init(struct noise_local *l, struct noise_upcall *upcall)
{
bzero(l, sizeof(*l));
rw_init(&l->l_identity_lock, "noise_local_identity");
l->l_upcall = *upcall;
}
void
noise_local_lock_identity(struct noise_local *l)
{
rw_enter_write(&l->l_identity_lock);
}
void
noise_local_unlock_identity(struct noise_local *l)
{
rw_exit_write(&l->l_identity_lock);
}
int
noise_local_set_private(struct noise_local *l, uint8_t private[NOISE_KEY_SIZE])
{
memcpy(l->l_private, private, NOISE_KEY_SIZE);
curve25519_clamp_secret(l->l_private);
l->l_has_identity = curve25519_generate_public(l->l_public, private);
return l->l_has_identity ? 0 : ENXIO;
}
int
noise_local_keys(struct noise_local *l, uint8_t public[NOISE_KEY_SIZE],
uint8_t private[NOISE_KEY_SIZE])
{
int ret = 0;
rw_enter_read(&l->l_identity_lock);
if (l->l_has_identity) {
if (public != NULL)
memcpy(public, l->l_public, NOISE_KEY_SIZE);
if (private != NULL)
memcpy(private, l->l_private, NOISE_KEY_SIZE);
} else {
ret = ENXIO;
}
rw_exit_read(&l->l_identity_lock);
return ret;
}
void
noise_remote_init(struct noise_remote *r, const uint8_t public[NOISE_KEY_SIZE],
struct noise_local *l)
{
bzero(r, sizeof(*r));
memcpy(r->r_public, public, NOISE_KEY_SIZE);
rw_init(&r->r_handshake_lock, "noise_handshake");
rw_init(&r->r_keypair_lock, "noise_keypair");
SLIST_INSERT_HEAD(&r->r_unused_keypairs, &r->r_keypair[0], kp_entry);
SLIST_INSERT_HEAD(&r->r_unused_keypairs, &r->r_keypair[1], kp_entry);
SLIST_INSERT_HEAD(&r->r_unused_keypairs, &r->r_keypair[2], kp_entry);
ASSERT(l != NULL);
r->r_local = l;
rw_enter_write(&l->l_identity_lock);
noise_remote_precompute(r);
rw_exit_write(&l->l_identity_lock);
}
int
noise_remote_set_psk(struct noise_remote *r, const uint8_t psk[NOISE_PSK_SIZE])
{
int same;
rw_enter_write(&r->r_handshake_lock);
same = !timingsafe_bcmp(r->r_psk, psk, NOISE_PSK_SIZE);
if (!same) {
memcpy(r->r_psk, psk, NOISE_PSK_SIZE);
}
rw_exit_write(&r->r_handshake_lock);
return same ? EEXIST : 0;
}
int
noise_remote_keys(struct noise_remote *r, uint8_t public[NOISE_KEY_SIZE],
uint8_t psk[NOISE_PSK_SIZE])
{
static uint8_t null_psk[NOISE_PSK_SIZE];
int ret;
if (public != NULL)
memcpy(public, r->r_public, NOISE_KEY_SIZE);
rw_enter_read(&r->r_handshake_lock);
if (psk != NULL)
memcpy(psk, r->r_psk, NOISE_PSK_SIZE);
ret = timingsafe_bcmp(r->r_psk, null_psk, NOISE_PSK_SIZE);
rw_exit_read(&r->r_handshake_lock);
/* If r_psk != null_psk return 0, else ENOENT (no psk) */
return ret ? 0 : ENOENT;
}
void
noise_remote_precompute(struct noise_remote *r)
{
struct noise_local *l = r->r_local;
if (!l->l_has_identity)
bzero(r->r_ss, NOISE_KEY_SIZE);
else if (!curve25519(r->r_ss, l->l_private, r->r_public))
bzero(r->r_ss, NOISE_KEY_SIZE);
rw_enter_write(&r->r_handshake_lock);
noise_remote_handshake_index_drop(r);
explicit_bzero(&r->r_handshake, sizeof(r->r_handshake));
rw_exit_write(&r->r_handshake_lock);
}
/* Handshake functions */
int
noise_create_initiation(struct noise_remote *r, struct noise_initiation *init)
{
struct noise_handshake *hs = &r->r_handshake;
struct noise_local *l = r->r_local;
uint8_t key[NOISE_SYMMETRIC_SIZE];
int ret = EINVAL;
rw_enter_read(&l->l_identity_lock);
rw_enter_write(&r->r_handshake_lock);
if (!l->l_has_identity)
goto error;
noise_param_init(hs->hs_ck, hs->hs_hash, r->r_public);
/* e */
curve25519_generate_secret(hs->hs_e);
if (curve25519_generate_public(init->ue, hs->hs_e) == 0)
goto error;
noise_msg_ephemeral(hs->hs_ck, hs->hs_hash, init->ue);
/* es */
if (noise_mix_dh(hs->hs_ck, key, hs->hs_e, r->r_public) != 0)
goto error;
/* s */
noise_msg_encrypt(init->es, l->l_public,
NOISE_KEY_SIZE, key, hs->hs_hash);
/* ss */
if (noise_mix_ss(hs->hs_ck, key, r->r_ss) != 0)
goto error;
/* {t} */
noise_tai64n_now(init->ets);
noise_msg_encrypt(init->ets, init->ets,
NOISE_TIMESTAMP_SIZE, key, hs->hs_hash);
noise_remote_handshake_index_drop(r);
hs->hs_state = CREATED_INITIATION;
hs->hs_local_index = noise_remote_handshake_index_get(r);
init->s_idx = hs->hs_local_index;
ret = 0;
error:
rw_exit_write(&r->r_handshake_lock);
rw_exit_read(&l->l_identity_lock);
if (ret != 0)
explicit_bzero(init, sizeof(*init));
explicit_bzero(key, NOISE_SYMMETRIC_SIZE);
return ret;
}
int
noise_consume_initiation(struct noise_local *l, struct noise_remote **rp,
struct noise_initiation *init)
{
struct noise_remote *r;
struct noise_handshake hs;
uint8_t key[NOISE_SYMMETRIC_SIZE];
uint8_t r_public[NOISE_KEY_SIZE];
uint8_t timestamp[NOISE_TIMESTAMP_SIZE];
int ret = EINVAL;
rw_enter_read(&l->l_identity_lock);
if (!l->l_has_identity)
goto error;
noise_param_init(hs.hs_ck, hs.hs_hash, l->l_public);
/* e */
noise_msg_ephemeral(hs.hs_ck, hs.hs_hash, init->ue);
/* es */
if (noise_mix_dh(hs.hs_ck, key, l->l_private, init->ue) != 0)
goto error;
/* s */
if (noise_msg_decrypt(r_public, init->es,
NOISE_KEY_SIZE + NOISE_MAC_SIZE, key, hs.hs_hash) != 0)
goto error;
/* Lookup the remote we received from */
if ((r = l->l_upcall.u_remote_get(l->l_upcall.u_arg, r_public)) == NULL)
goto error;
/* ss */
if (noise_mix_ss(hs.hs_ck, key, r->r_ss) != 0)
goto error;
/* {t} */
if (noise_msg_decrypt(timestamp, init->ets,
NOISE_TIMESTAMP_SIZE + NOISE_MAC_SIZE, key, hs.hs_hash) != 0)
goto error;
hs.hs_state = CONSUMED_INITIATION;
hs.hs_local_index = 0;
hs.hs_remote_index = init->s_idx;
memcpy(hs.hs_e, init->ue, NOISE_KEY_SIZE);
/* We have successfully computed the same results, now we ensure that
* this is not an initiation replay, or a flood attack */
rw_enter_write(&r->r_handshake_lock);
/* Replay */
if (memcmp(timestamp, r->r_timestamp, NOISE_TIMESTAMP_SIZE) > 0)
memcpy(r->r_timestamp, timestamp, NOISE_TIMESTAMP_SIZE);
else
goto error_set;
/* Flood attack */
if (noise_timer_expired(&r->r_last_init, 0, REJECT_INTERVAL))
getnanouptime(&r->r_last_init);
else
goto error_set;
/* Ok, we're happy to accept this initiation now */
noise_remote_handshake_index_drop(r);
r->r_handshake = hs;
*rp = r;
ret = 0;
error_set:
rw_exit_write(&r->r_handshake_lock);
error:
rw_exit_read(&l->l_identity_lock);
explicit_bzero(key, NOISE_SYMMETRIC_SIZE);
explicit_bzero(&hs, sizeof(hs));
return ret;
}
int
noise_create_response(struct noise_remote *r, struct noise_response *resp)
{
struct noise_handshake *hs = &r->r_handshake;
uint8_t key[NOISE_SYMMETRIC_SIZE];
uint8_t e[NOISE_KEY_SIZE];
int ret = EINVAL;
rw_enter_read(&r->r_local->l_identity_lock);
rw_enter_write(&r->r_handshake_lock);
if (hs->hs_state != CONSUMED_INITIATION)
goto error;
/* e */
curve25519_generate_secret(e);
if (curve25519_generate_public(resp->ue, e) == 0)
goto error;
noise_msg_ephemeral(hs->hs_ck, hs->hs_hash, resp->ue);
/* ee */
if (noise_mix_dh(hs->hs_ck, NULL, e, hs->hs_e) != 0)
goto error;
/* se */
if (noise_mix_dh(hs->hs_ck, NULL, e, r->r_public) != 0)
goto error;
/* psk */
noise_mix_psk(hs->hs_ck, hs->hs_hash, key, r->r_psk);
/* {} */
noise_msg_encrypt(resp->en, NULL, 0, key, hs->hs_hash);
hs->hs_state = CREATED_RESPONSE;
hs->hs_local_index = noise_remote_handshake_index_get(r);
resp->r_idx = hs->hs_remote_index;
resp->s_idx = hs->hs_local_index;
ret = 0;
error:
rw_exit_write(&r->r_handshake_lock);
rw_exit_read(&r->r_local->l_identity_lock);
if (ret != 0)
explicit_bzero(resp, sizeof(*resp));
explicit_bzero(key, NOISE_SYMMETRIC_SIZE);
explicit_bzero(e, NOISE_KEY_SIZE);
return ret;
}
int
noise_consume_response(struct noise_remote *r, struct noise_response *resp)
{
struct noise_local *l = r->r_local;
struct noise_handshake hs;
uint8_t key[NOISE_SYMMETRIC_SIZE];
uint8_t preshared_key[NOISE_KEY_SIZE];
int ret = EINVAL;
rw_enter_read(&l->l_identity_lock);
if (!l->l_has_identity)
goto error;
rw_enter_read(&r->r_handshake_lock);
hs = r->r_handshake;
memcpy(preshared_key, r->r_psk, NOISE_PSK_SIZE);
rw_exit_read(&r->r_handshake_lock);
if (hs.hs_state != CREATED_INITIATION ||
hs.hs_local_index != resp->r_idx)
goto error;
/* e */
noise_msg_ephemeral(hs.hs_ck, hs.hs_hash, resp->ue);
/* ee */
if (noise_mix_dh(hs.hs_ck, NULL, hs.hs_e, resp->ue) != 0)
goto error;
/* se */
if (noise_mix_dh(hs.hs_ck, NULL, l->l_private, resp->ue) != 0)
goto error;
/* psk */
noise_mix_psk(hs.hs_ck, hs.hs_hash, key, preshared_key);
/* {} */
if (noise_msg_decrypt(NULL, resp->en,
0 + NOISE_MAC_SIZE, key, hs.hs_hash) != 0)
goto error;
hs.hs_remote_index = resp->s_idx;
rw_enter_write(&r->r_handshake_lock);
if (r->r_handshake.hs_state == hs.hs_state &&
r->r_handshake.hs_local_index == hs.hs_local_index) {
r->r_handshake = hs;
r->r_handshake.hs_state = CONSUMED_RESPONSE;
ret = 0;
}
rw_exit_write(&r->r_handshake_lock);
error:
rw_exit_read(&l->l_identity_lock);
explicit_bzero(&hs, sizeof(hs));
explicit_bzero(key, NOISE_SYMMETRIC_SIZE);
return ret;
}
int
noise_remote_begin_session(struct noise_remote *r)
{
struct noise_handshake *hs = &r->r_handshake;
struct noise_keypair kp, *next, *current, *previous;
rw_enter_write(&r->r_handshake_lock);
/* We now derive the keypair from the handshake */
if (hs->hs_state == CONSUMED_RESPONSE) {
kp.kp_is_initiator = 1;
noise_kdf(kp.kp_send, kp.kp_recv, NULL, NULL,
NOISE_SYMMETRIC_SIZE, NOISE_SYMMETRIC_SIZE, 0, 0,
hs->hs_ck);
} else if (hs->hs_state == CREATED_RESPONSE) {
kp.kp_is_initiator = 0;
noise_kdf(kp.kp_recv, kp.kp_send, NULL, NULL,
NOISE_SYMMETRIC_SIZE, NOISE_SYMMETRIC_SIZE, 0, 0,
hs->hs_ck);
} else {
rw_exit_write(&r->r_keypair_lock);
return EINVAL;
}
kp.kp_valid = 1;
kp.kp_local_index = hs->hs_local_index;
kp.kp_remote_index = hs->hs_remote_index;
getnanouptime(&kp.kp_birthdate);
bzero(&kp.kp_ctr, sizeof(kp.kp_ctr));
rw_init(&kp.kp_ctr.c_lock, "noise_counter");
/* Now we need to add_new_keypair */
rw_enter_write(&r->r_keypair_lock);
next = r->r_next;
current = r->r_current;
previous = r->r_previous;
if (kp.kp_is_initiator) {
if (next != NULL) {
r->r_next = NULL;
r->r_previous = next;
noise_remote_keypair_free(r, current);
} else {
r->r_previous = current;
}
noise_remote_keypair_free(r, previous);
r->r_current = noise_remote_keypair_allocate(r);
*r->r_current = kp;
} else {
noise_remote_keypair_free(r, next);
r->r_previous = NULL;
noise_remote_keypair_free(r, previous);
r->r_next = noise_remote_keypair_allocate(r);
*r->r_next = kp;
}
rw_exit_write(&r->r_keypair_lock);
explicit_bzero(&r->r_handshake, sizeof(r->r_handshake));
rw_exit_write(&r->r_handshake_lock);
explicit_bzero(&kp, sizeof(kp));
return 0;
}
void
noise_remote_clear(struct noise_remote *r)
{
rw_enter_write(&r->r_handshake_lock);
noise_remote_handshake_index_drop(r);
explicit_bzero(&r->r_handshake, sizeof(r->r_handshake));
rw_exit_write(&r->r_handshake_lock);
rw_enter_write(&r->r_keypair_lock);
noise_remote_keypair_free(r, r->r_next);
noise_remote_keypair_free(r, r->r_current);
noise_remote_keypair_free(r, r->r_previous);
rw_exit_write(&r->r_keypair_lock);
}
void
noise_remote_expire_current(struct noise_remote *r)
{
rw_enter_write(&r->r_keypair_lock);
if (r->r_next != NULL)
r->r_next->kp_valid = 0;
if (r->r_current != NULL)
r->r_current->kp_valid = 0;
rw_exit_write(&r->r_keypair_lock);
}
int
noise_remote_ready(struct noise_remote *r)
{
struct noise_keypair *kp;
int ret;
rw_enter_read(&r->r_keypair_lock);
/* kp_ctr isn't locked here, we're happy to accept a racy read. */
if ((kp = r->r_current) == NULL ||
!kp->kp_valid ||
noise_timer_expired(&kp->kp_birthdate, REJECT_AFTER_TIME, 0) ||
kp->kp_ctr.c_recv >= REJECT_AFTER_MESSAGES ||
kp->kp_ctr.c_send >= REJECT_AFTER_MESSAGES)
ret = EINVAL;
else
ret = 0;
rw_exit_read(&r->r_keypair_lock);
return ret;
}
int
noise_remote_encrypt(struct noise_remote *r, struct noise_data *data,
size_t len)
{
struct noise_keypair *kp;
uint64_t ctr;
int ret = EINVAL;
rw_enter_read(&r->r_keypair_lock);
if ((kp = r->r_current) == NULL)
goto error;
/* We confirm that our values are within our tolerances. We want:
* - a valid keypair
* - our keypair to be less than REJECT_AFTER_TIME seconds old
* - our receive counter to be less than REJECT_AFTER_MESSAGES
* - our send counter to be less than REJECT_AFTER_MESSAGES
*
* kp_ctr isn't locked here, we're happy to accept a racy read. */
if (!kp->kp_valid ||
noise_timer_expired(&kp->kp_birthdate, REJECT_AFTER_TIME, 0) ||
kp->kp_ctr.c_recv >= REJECT_AFTER_MESSAGES ||
((ctr = noise_counter_send(&kp->kp_ctr)) > REJECT_AFTER_MESSAGES))
goto error;
/* Ensure that our counter is little endian and then encrypt our
* payload. We encrypt into the same buffer, so the caller must ensure
* that buf has NOISE_MAC_SIZE bytes to store the MAC. The nonce and
* index are passed back out to the caller through the provided
* data pointer. */
data->nonce = htole64(ctr);
data->r_idx = kp->kp_remote_index;
chacha20poly1305_encrypt(data->buf, data->buf, len,
NULL, 0, data->nonce, kp->kp_send);
/* If our values are still within tolerances, but we are approaching
* the tolerances, we notify the caller with ESTALE that they should
* establish a new keypair. The current keypair can continue to be used
* until the tolerances are hit. We notify if:
* - our send counter is not less than REKEY_AFTER_MESSAGES
* - we're the initiator and our keypair is older than
* REKEY_AFTER_TIME seconds */
ret = ESTALE;
if (ctr >= REKEY_AFTER_MESSAGES)
goto error;
if (kp->kp_is_initiator &&
noise_timer_expired(&kp->kp_birthdate, REKEY_AFTER_TIME, 0))
goto error;
ret = 0;
error:
rw_exit_read(&r->r_keypair_lock);
return ret;
}
int
noise_remote_decrypt(struct noise_remote *r, struct noise_data *data,
size_t len)
{
struct noise_keypair *kp;
uint64_t ctr;
int ret = EINVAL;
/* We retrieve the keypair corresponding to the provided index. We
* attempt the current keypair first as that is most likely. We also
* want to make sure that the keypair is valid as it would be
* catastrophic to decrypt against a zero'ed keypair. */
rw_enter_read(&r->r_keypair_lock);
if (r->r_current != NULL && r->r_current->kp_local_index == data->r_idx) {
kp = r->r_current;
} else if (r->r_previous != NULL && r->r_previous->kp_local_index == data->r_idx) {
kp = r->r_previous;
} else if (r->r_next != NULL && r->r_next->kp_local_index == data->r_idx) {
kp = r->r_next;
} else {
goto error;
}
/* We confirm that our values are within our tolerances. These values
* are the same as the encrypt routine.
*
* kp_ctr isn't locked here, we're happy to accept a racy read. */
if (noise_timer_expired(&kp->kp_birthdate, REJECT_AFTER_TIME, 0) ||
kp->kp_ctr.c_send >= REJECT_AFTER_MESSAGES ||
kp->kp_ctr.c_recv >= REJECT_AFTER_MESSAGES)
goto error;
/* Ensure we've got the counter in host byte order, then decrypt,
* then validate the counter. We don't want to validate the counter
* before decrypting as we do not know the message is authentic prior
* to decryption. */
ctr = letoh64(data->nonce);
if (chacha20poly1305_decrypt(data->buf, data->buf, len,
NULL, 0, data->nonce, kp->kp_recv) == 0)
goto error;
if (noise_counter_recv(&kp->kp_ctr, ctr) != 0)
goto error;
/* If we've received the handshake confirming data packet then move the
* next keypair into current. If we do slide the next keypair in, then
* we skip the REKEY_AFTER_TIME_RECV check. This is safe to do as a
* data packet can't confirm a session that we are an INITIATOR of. */
if (kp == r->r_next) {
rw_exit_read(&r->r_keypair_lock);
rw_enter_write(&r->r_keypair_lock);
if (kp == r->r_next && kp->kp_local_index == data->r_idx) {
noise_remote_keypair_free(r, r->r_previous);
r->r_previous = r->r_current;
r->r_current = r->r_next;
r->r_next = NULL;
ret = ECONNRESET;
goto error;
}
rw_downgrade(&r->r_keypair_lock);
}
/* Similar to when we encrypt, we want to notify the caller when we
* are approaching our tolerances. We notify if:
* - we're the initiator and the current keypair is older than
* REKEY_AFTER_TIME_RECV seconds. */
ret = ESTALE;
kp = r->r_current;
if (kp->kp_is_initiator &&
noise_timer_expired(&kp->kp_birthdate, REKEY_AFTER_TIME_RECV, 0))
goto error;
ret = 0;
error:
rw_exit(&r->r_keypair_lock);
return ret;
}
/* Private functions - these should not be called outside this file under any
* circumstances. */
static struct noise_keypair *
noise_remote_keypair_allocate(struct noise_remote *r)
{
struct noise_keypair *kp;
kp = SLIST_FIRST(&r->r_unused_keypairs);
SLIST_REMOVE_HEAD(&r->r_unused_keypairs, kp_entry);
return kp;
}
static void
noise_remote_keypair_free(struct noise_remote *r, struct noise_keypair *kp)
{
struct noise_upcall *u = &r->r_local->l_upcall;
if (kp != NULL) {
SLIST_INSERT_HEAD(&r->r_unused_keypairs, kp, kp_entry);
u->u_index_drop(u->u_arg, kp->kp_local_index);
bzero(kp->kp_send, sizeof(kp->kp_send));
bzero(kp->kp_recv, sizeof(kp->kp_recv));
}
}
static uint32_t
noise_remote_handshake_index_get(struct noise_remote *r)
{
struct noise_upcall *u = &r->r_local->l_upcall;
return u->u_index_set(u->u_arg, r);
}
static void
noise_remote_handshake_index_drop(struct noise_remote *r)
{
struct noise_handshake *hs = &r->r_handshake;
struct noise_upcall *u = &r->r_local->l_upcall;
rw_assert(&r->r_handshake_lock, RA_WLOCKED);
if (hs->hs_state != HS_ZEROED)
u->u_index_drop(u->u_arg, hs->hs_local_index);
}
static uint64_t
noise_counter_send(struct noise_counter *ctr)
{
uint64_t ret;
rw_enter_write(&ctr->c_lock);
ret = ctr->c_send++;
rw_exit_write(&ctr->c_lock);
return ret;
}
static int
noise_counter_recv(struct noise_counter *ctr, uint64_t recv)
{
uint64_t i, top, index_recv, index_ctr;
COUNTER_TYPE bit;
int ret = EEXIST;
rw_enter_write(&ctr->c_lock);
/* Check that the recv counter is valid */
if (ctr->c_recv >= REJECT_AFTER_MESSAGES ||
recv >= REJECT_AFTER_MESSAGES)
goto error;
/* If the packet is out of the window, invalid */
if (recv + COUNTER_WINDOW_SIZE < ctr->c_recv)
goto error;
/* If the new counter is ahead of the current counter, we'll need to
* zero out the bitmap that has previously been used */
index_recv = recv / COUNTER_TYPE_BITS;
index_ctr = ctr->c_recv / COUNTER_TYPE_BITS;
if (recv > ctr->c_recv) {
top = MIN(index_recv - index_ctr, COUNTER_TYPE_NUM);
for (i = 1; i <= top; i++)
ctr->c_backtrack[
(i + index_ctr) & (COUNTER_TYPE_NUM - 1)] = 0;
ctr->c_recv = recv;
}
index_recv %= COUNTER_TYPE_NUM;
bit = ((COUNTER_TYPE)1) << (recv % COUNTER_TYPE_BITS);
if (ctr->c_backtrack[index_recv] & bit)
goto error;
ctr->c_backtrack[index_recv] |= bit;
ret = 0;
error:
rw_exit_write(&ctr->c_lock);
return ret;
}
static void
noise_kdf(uint8_t *a, uint8_t *b, uint8_t *c, const uint8_t *x,
size_t a_len, size_t b_len, size_t c_len, size_t x_len,
const uint8_t ck[NOISE_HASH_SIZE])
{
uint8_t out[BLAKE2S_HASH_SIZE + 1];
uint8_t sec[BLAKE2S_HASH_SIZE];
ASSERT(a_len <= BLAKE2S_HASH_SIZE && b_len <= BLAKE2S_HASH_SIZE &&
c_len <= BLAKE2S_HASH_SIZE);
ASSERT(!(b || b_len || c || c_len) || (a && a_len));
ASSERT(!(c || c_len) || (b && b_len));
/* Extract entropy from "x" into sec */
blake2s_hmac(sec, x, ck, BLAKE2S_HASH_SIZE, x_len, NOISE_HASH_SIZE);
if (a == NULL || a_len == 0)
goto out;
/* Expand first key: key = sec, data = 0x1 */
out[0] = 1;
blake2s_hmac(out, out, sec, BLAKE2S_HASH_SIZE, 1, BLAKE2S_HASH_SIZE);
memcpy(a, out, a_len);
if (b == NULL || b_len == 0)
goto out;
/* Expand second key: key = sec, data = "a" || 0x2 */
out[BLAKE2S_HASH_SIZE] = 2;
blake2s_hmac(out, out, sec, BLAKE2S_HASH_SIZE, BLAKE2S_HASH_SIZE + 1,
BLAKE2S_HASH_SIZE);
memcpy(b, out, b_len);
if (c == NULL || c_len == 0)
goto out;
/* Expand third key: key = sec, data = "b" || 0x3 */
out[BLAKE2S_HASH_SIZE] = 3;
blake2s_hmac(out, out, sec, BLAKE2S_HASH_SIZE, BLAKE2S_HASH_SIZE + 1,
BLAKE2S_HASH_SIZE);
memcpy(c, out, c_len);
out:
/* Clear sensitive data from stack */
explicit_bzero(sec, BLAKE2S_HASH_SIZE);
explicit_bzero(out, BLAKE2S_HASH_SIZE + 1);
}
static int
noise_mix_dh(uint8_t ck[NOISE_HASH_SIZE], uint8_t key[NOISE_SYMMETRIC_SIZE],
const uint8_t private[NOISE_KEY_SIZE],
const uint8_t public[NOISE_KEY_SIZE])
{
uint8_t dh[NOISE_KEY_SIZE];
if (!curve25519(dh, private, public))
return EINVAL;
noise_kdf(ck, key, NULL, dh,
NOISE_HASH_SIZE, NOISE_SYMMETRIC_SIZE, 0, NOISE_KEY_SIZE, ck);
explicit_bzero(dh, NOISE_KEY_SIZE);
return 0;
}
static int
noise_mix_ss(uint8_t ck[NOISE_HASH_SIZE], uint8_t key[NOISE_SYMMETRIC_SIZE],
const uint8_t ss[NOISE_KEY_SIZE])
{
static uint8_t null_point[NOISE_KEY_SIZE];
if (timingsafe_bcmp(ss, null_point, NOISE_KEY_SIZE) == 0)
return ENOENT;
noise_kdf(ck, key, NULL, ss,
NOISE_HASH_SIZE, NOISE_SYMMETRIC_SIZE, 0, NOISE_KEY_SIZE, ck);
return 0;
}
static void
noise_mix_hash(uint8_t hash[NOISE_HASH_SIZE], const uint8_t *src,
size_t src_len)
{
struct blake2s_state blake;
blake2s_init(&blake, NOISE_HASH_SIZE);
blake2s_update(&blake, hash, NOISE_HASH_SIZE);
blake2s_update(&blake, src, src_len);
blake2s_final(&blake, hash, NOISE_HASH_SIZE);
}
static void
noise_mix_psk(uint8_t ck[NOISE_HASH_SIZE], uint8_t hash[NOISE_HASH_SIZE],
uint8_t key[NOISE_SYMMETRIC_SIZE], const uint8_t psk[NOISE_KEY_SIZE])
{
uint8_t tmp[NOISE_HASH_SIZE];
noise_kdf(ck, tmp, key, psk,
NOISE_HASH_SIZE, NOISE_HASH_SIZE, NOISE_SYMMETRIC_SIZE,
NOISE_PSK_SIZE, ck);
noise_mix_hash(hash, tmp, NOISE_HASH_SIZE);
explicit_bzero(tmp, NOISE_HASH_SIZE);
}
static void
noise_param_init(uint8_t ck[NOISE_HASH_SIZE], uint8_t hash[NOISE_HASH_SIZE],
const uint8_t s[NOISE_KEY_SIZE])
{
struct blake2s_state blake;
blake2s(ck, (uint8_t *)NOISE_HANDSHAKE_NAME, NULL,
NOISE_HASH_SIZE, strlen(NOISE_HANDSHAKE_NAME), 0);
blake2s_init(&blake, NOISE_HASH_SIZE);
blake2s_update(&blake, ck, NOISE_HASH_SIZE);
blake2s_update(&blake, (uint8_t *)NOISE_IDENTIFIER_NAME,
strlen(NOISE_IDENTIFIER_NAME));
blake2s_final(&blake, hash, NOISE_HASH_SIZE);
noise_mix_hash(hash, s, NOISE_KEY_SIZE);
}
static void
noise_msg_encrypt(uint8_t *dst, const uint8_t *src, size_t src_len,
uint8_t key[NOISE_SYMMETRIC_SIZE], uint8_t hash[NOISE_HASH_SIZE])
{
/* Nonce always zero for Noise_IK */
chacha20poly1305_encrypt(dst, src, src_len,
hash, NOISE_HASH_SIZE, 0, key);
noise_mix_hash(hash, dst, src_len + NOISE_MAC_SIZE);
}
static int
noise_msg_decrypt(uint8_t *dst, const uint8_t *src, size_t src_len,
uint8_t key[NOISE_SYMMETRIC_SIZE], uint8_t hash[NOISE_HASH_SIZE])
{
/* Nonce always zero for Noise_IK */
if (!chacha20poly1305_decrypt(dst, src, src_len,
hash, NOISE_HASH_SIZE, 0, key))
return EINVAL;
noise_mix_hash(hash, src, src_len);
return 0;
}
static void
noise_msg_ephemeral(uint8_t ck[NOISE_HASH_SIZE], uint8_t hash[NOISE_HASH_SIZE],
const uint8_t src[NOISE_KEY_SIZE])
{
noise_mix_hash(hash, src, NOISE_KEY_SIZE);
noise_kdf(ck, NULL, NULL, src, NOISE_HASH_SIZE, 0, 0, NOISE_KEY_SIZE, ck);
}
static void
noise_tai64n_now(uint8_t output[NOISE_TIMESTAMP_SIZE])
{
struct timespec time;
getnanotime(&time);
/* Round down the nsec counter to limit precise timing leak. */
time.tv_nsec &= REJECT_INTERVAL_MASK;
/* https://cr.yp.to/libtai/tai64.html */
*(uint64_t *)output = htobe64(0x400000000000000aULL + time.tv_sec);
*(uint32_t *)(output + sizeof(uint64_t)) = htobe32(time.tv_nsec);
}
static int
noise_timer_expired(struct timespec *birthdate, time_t sec, long nsec)
{
struct timespec uptime;
struct timespec expire = { .tv_sec = sec, .tv_nsec = nsec };
/* We don't really worry about a zeroed birthdate, to avoid the extra
* check on every encrypt/decrypt. This does mean that r_last_init
* check may fail if getnanouptime is < REJECT_INTERVAL from 0. */
getnanouptime(&uptime);
timespecadd(birthdate, &expire, &expire);
return timespeccmp(&uptime, &expire, >) ? ETIMEDOUT : 0;
}

View File

@ -817,3 +817,16 @@ void
taskqgroup_destroy(struct taskqgroup *qgroup)
{
}
void
taskqgroup_drain_all(struct taskqgroup *tqg)
{
struct gtaskqueue *q;
for (int i = 0; i < mp_ncpus; i++) {
q = tqg->tqg_queue[i].tgc_taskq;
if (q == NULL)
continue;
gtaskqueue_drain_all(q);
}
}

View File

@ -162,6 +162,7 @@ SUBDIR= \
if_tuntap \
if_vlan \
if_vxlan \
if_wg \
iflib \
${_iir} \
imgact_binmisc \

View File

@ -0,0 +1,41 @@
# $FreeBSD$
KMOD= if_wg
INCDIR= ${SRCTOP}/sys/dev/if_wg/include
ZINCDIR= ${SRCTOP}/sys/dev/if_wg/module/crypto/zinc
.PATH: ${SRCTOP}/sys/dev/if_wg/module
.PATH: ${ZINCDIR}
.PATH: ${ZINCDIR}/chacha20
.PATH: ${ZINCDIR}/poly1305
CFLAGS+= -I${INCDIR}
CFLAGS+= -D__KERNEL__
CFLAGS+= -ferror-limit=7
DEBUG_FLAGS=-g
SRCS= opt_inet.h opt_inet6.h device_if.h bus_if.h ifdi_if.h
SRCS+= if_wg_session.c module.c
SRCS+= wg_noise.c wg_cookie.c
SRCS+= curve25519.c blake2s.c
SRCS+= chacha20poly1305.c chacha20.c poly1305.c
.if ${MACHINE_ARCH} == "amd64"
SRCS += poly1305-x86_64.S chacha20-x86_64.S
SIMD_FLAGS = -DCONFIG_AS_SSSE3=1 -DCONFIG_AS_AVX=1 \
-DCONFIG_AS_AVX512=1 -DCONFIG_AS_AVX2=1
.endif
.include <bsd.kmod.mk>
.if ${MACHINE_ARCH} == "amd64"
CFLAGS.poly1305-x86_64.S = -D__LOCORE -gdwarf-4 ${SIMD_FLAGS} -include ${INCDIR}/sys/support.h
CFLAGS.chacha20-x86_64.S = -D__LOCORE -gdwarf-4 ${SIMD_FLAGS} -include ${INCDIR}/sys/support.h
CFLAGS.chacha20poly1305.c = -DCONFIG_ZINC_ARCH_X86_64
CFLAGS.chacha20.c = -DCONFIG_ZINC_ARCH_X86_64
CFLAGS.poly1305.c = -DCONFIG_ZINC_ARCH_X86_64
.endif

View File

@ -269,7 +269,6 @@ iflib_clone_register(if_shared_ctx_t sctx)
printf("clone_simple failed -- cloned %s devices will not be available\n", sctx->isc_name);
goto fail_clone;
}
ifc_flags_set(ip->ip_ifc, IFC_NOGROUP);
ip->ip_lladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event,
iflib_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
if (ip->ip_lladdr_tag == NULL)

View File

@ -80,6 +80,7 @@ void taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask);
struct taskqgroup *taskqgroup_create(const char *name, int cnt, int stride);
void taskqgroup_destroy(struct taskqgroup *qgroup);
void taskqgroup_bind(struct taskqgroup *qgroup);
void taskqgroup_drain_all(struct taskqgroup *qgroup);
#define GTASK_INIT(gtask, flags, priority, func, context) do { \
(gtask)->ta_flags = flags; \