Import kernel WireGuard support
Data path largely shared with the OpenBSD implementation by Matt Dunwoodie <ncon@nconroy.net> Reviewed by: grehan@freebsd.org MFC after: 1 month Sponsored by: Rubicon LLC, (Netgate) Differential Revision: https://reviews.freebsd.org/D26137
This commit is contained in:
parent
baa2cd58a6
commit
2338da0373
@ -35,6 +35,7 @@ SRCS+= ifvxlan.c # VXLAN support
|
||||
SRCS+= ifgre.c # GRE keys etc
|
||||
SRCS+= ifgif.c # GIF reversed header workaround
|
||||
SRCS+= ifipsec.c # IPsec VTI
|
||||
SRCS+= ifwg.c # Wireguard
|
||||
|
||||
SRCS+= sfp.c # SFP/SFP+ information
|
||||
LIBADD+= ifconfig m util
|
||||
@ -68,6 +69,7 @@ CFLAGS+= -DINET
|
||||
CFLAGS+= -DJAIL
|
||||
LIBADD+= jail
|
||||
.endif
|
||||
LIBADD+= nv
|
||||
|
||||
MAN= ifconfig.8
|
||||
|
||||
|
618
sbin/ifconfig/ifwg.c
Normal file
618
sbin/ifconfig/ifwg.c
Normal file
@ -0,0 +1,618 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 Rubicon Communications, LLC (Netgate)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#ifndef RESCUE
|
||||
#include <sys/param.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/nv.h>
|
||||
|
||||
#include <net/ethernet.h>
|
||||
#include <net/if.h>
|
||||
#include <net/if_dl.h>
|
||||
#include <net/if_types.h>
|
||||
#include <net/if_media.h>
|
||||
#include <net/route.h>
|
||||
|
||||
#include <netinet/in.h>
|
||||
#include <arpa/inet.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include <err.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <netdb.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <stdarg.h>
|
||||
#include <stddef.h> /* NB: for offsetof */
|
||||
#include <locale.h>
|
||||
#include <langinfo.h>
|
||||
#include <resolv.h>
|
||||
|
||||
#include "ifconfig.h"
|
||||
|
||||
typedef enum {
|
||||
WGC_GET = 0x5,
|
||||
WGC_SET = 0x6,
|
||||
} wg_cmd_t;
|
||||
|
||||
static nvlist_t *nvl_params;
|
||||
static bool do_peer;
|
||||
static int allowed_ips_count;
|
||||
static int allowed_ips_max;
|
||||
struct allowedip {
|
||||
struct sockaddr_storage a_addr;
|
||||
struct sockaddr_storage a_mask;
|
||||
};
|
||||
struct allowedip *allowed_ips;
|
||||
|
||||
#define ALLOWEDIPS_START 16
|
||||
#define WG_KEY_LEN 32
|
||||
#define WG_KEY_LEN_BASE64 ((((WG_KEY_LEN) + 2) / 3) * 4 + 1)
|
||||
#define WG_KEY_LEN_HEX (WG_KEY_LEN * 2 + 1)
|
||||
#define WG_MAX_STRLEN 64
|
||||
|
||||
static bool
|
||||
key_from_base64(uint8_t key[static WG_KEY_LEN], const char *base64)
|
||||
{
|
||||
|
||||
if (strlen(base64) != WG_KEY_LEN_BASE64 - 1) {
|
||||
warnx("bad key len - need %d got %zu\n", WG_KEY_LEN_BASE64 - 1, strlen(base64));
|
||||
return false;
|
||||
}
|
||||
if (base64[WG_KEY_LEN_BASE64 - 2] != '=') {
|
||||
warnx("bad key terminator, expected '=' got '%c'", base64[WG_KEY_LEN_BASE64 - 2]);
|
||||
return false;
|
||||
}
|
||||
return (b64_pton(base64, key, WG_KEY_LEN));
|
||||
}
|
||||
|
||||
static void
|
||||
parse_endpoint(const char *endpoint_)
|
||||
{
|
||||
int err;
|
||||
char *base, *endpoint, *port, *colon, *tmp;
|
||||
struct addrinfo hints, *res;
|
||||
|
||||
endpoint = base = strdup(endpoint_);
|
||||
colon = rindex(endpoint, ':');
|
||||
if (colon == NULL)
|
||||
errx(1, "bad endpoint format %s - no port delimiter found", endpoint);
|
||||
*colon = '\0';
|
||||
port = colon + 1;
|
||||
|
||||
/* [::]:<> */
|
||||
if (endpoint[0] == '[') {
|
||||
endpoint++;
|
||||
tmp = index(endpoint, ']');
|
||||
if (tmp == NULL)
|
||||
errx(1, "bad endpoint format %s - '[' found with no matching ']'", endpoint);
|
||||
*tmp = '\0';
|
||||
}
|
||||
bzero(&hints, sizeof(hints));
|
||||
hints.ai_family = AF_UNSPEC;
|
||||
err = getaddrinfo(endpoint, port, &hints, &res);
|
||||
if (err)
|
||||
errx(1, "%s", gai_strerror(err));
|
||||
nvlist_add_binary(nvl_params, "endpoint", res->ai_addr, res->ai_addrlen);
|
||||
freeaddrinfo(res);
|
||||
free(base);
|
||||
}
|
||||
|
||||
static void
|
||||
in_len2mask(struct in_addr *mask, u_int len)
|
||||
{
|
||||
u_int i;
|
||||
u_char *p;
|
||||
|
||||
p = (u_char *)mask;
|
||||
memset(mask, 0, sizeof(*mask));
|
||||
for (i = 0; i < len / NBBY; i++)
|
||||
p[i] = 0xff;
|
||||
if (len % NBBY)
|
||||
p[i] = (0xff00 >> (len % NBBY)) & 0xff;
|
||||
}
|
||||
|
||||
static u_int
|
||||
in_mask2len(struct in_addr *mask)
|
||||
{
|
||||
u_int x, y;
|
||||
u_char *p;
|
||||
|
||||
p = (u_char *)mask;
|
||||
for (x = 0; x < sizeof(*mask); x++) {
|
||||
if (p[x] != 0xff)
|
||||
break;
|
||||
}
|
||||
y = 0;
|
||||
if (x < sizeof(*mask)) {
|
||||
for (y = 0; y < NBBY; y++) {
|
||||
if ((p[x] & (0x80 >> y)) == 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
return x * NBBY + y;
|
||||
}
|
||||
|
||||
static void
|
||||
in6_prefixlen2mask(struct in6_addr *maskp, int len)
|
||||
{
|
||||
static const u_char maskarray[NBBY] = {0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff};
|
||||
int bytelen, bitlen, i;
|
||||
|
||||
/* sanity check */
|
||||
if (len < 0 || len > 128) {
|
||||
errx(1, "in6_prefixlen2mask: invalid prefix length(%d)\n",
|
||||
len);
|
||||
return;
|
||||
}
|
||||
|
||||
memset(maskp, 0, sizeof(*maskp));
|
||||
bytelen = len / NBBY;
|
||||
bitlen = len % NBBY;
|
||||
for (i = 0; i < bytelen; i++)
|
||||
maskp->s6_addr[i] = 0xff;
|
||||
if (bitlen)
|
||||
maskp->s6_addr[bytelen] = maskarray[bitlen - 1];
|
||||
}
|
||||
|
||||
static int
|
||||
in6_mask2len(struct in6_addr *mask, u_char *lim0)
|
||||
{
|
||||
int x = 0, y;
|
||||
u_char *lim = lim0, *p;
|
||||
|
||||
/* ignore the scope_id part */
|
||||
if (lim0 == NULL || lim0 - (u_char *)mask > sizeof(*mask))
|
||||
lim = (u_char *)mask + sizeof(*mask);
|
||||
for (p = (u_char *)mask; p < lim; x++, p++) {
|
||||
if (*p != 0xff)
|
||||
break;
|
||||
}
|
||||
y = 0;
|
||||
if (p < lim) {
|
||||
for (y = 0; y < NBBY; y++) {
|
||||
if ((*p & (0x80 >> y)) == 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* when the limit pointer is given, do a stricter check on the
|
||||
* remaining bits.
|
||||
*/
|
||||
if (p < lim) {
|
||||
if (y != 0 && (*p & (0x00ff >> y)) != 0)
|
||||
return -1;
|
||||
for (p = p + 1; p < lim; p++)
|
||||
if (*p != 0)
|
||||
return -1;
|
||||
}
|
||||
|
||||
return x * NBBY + y;
|
||||
}
|
||||
|
||||
static bool
|
||||
parse_ip(struct allowedip *aip, const char *value)
|
||||
{
|
||||
struct addrinfo hints, *res;
|
||||
int err;
|
||||
|
||||
bzero(&aip->a_addr, sizeof(aip->a_addr));
|
||||
bzero(&hints, sizeof(hints));
|
||||
hints.ai_family = AF_UNSPEC;
|
||||
hints.ai_flags = AI_NUMERICHOST;
|
||||
err = getaddrinfo(value, NULL, &hints, &res);
|
||||
if (err)
|
||||
errx(1, "%s", gai_strerror(err));
|
||||
|
||||
memcpy(&aip->a_addr, res->ai_addr, res->ai_addrlen);
|
||||
|
||||
freeaddrinfo(res);
|
||||
return (true);
|
||||
}
|
||||
|
||||
static void
|
||||
sa_ntop(const struct sockaddr *sa, char *buf, int *port)
|
||||
{
|
||||
const struct sockaddr_in *sin;
|
||||
const struct sockaddr_in6 *sin6;
|
||||
int err;
|
||||
|
||||
err = getnameinfo(sa, sa->sa_len, buf, INET6_ADDRSTRLEN, NULL,
|
||||
0, NI_NUMERICHOST);
|
||||
|
||||
if (sa->sa_family == AF_INET) {
|
||||
sin = (const struct sockaddr_in *)sa;
|
||||
if (port)
|
||||
*port = sin->sin_port;
|
||||
} else if (sa->sa_family == AF_INET6) {
|
||||
sin6 = (const struct sockaddr_in6 *)sa;
|
||||
if (port)
|
||||
*port = sin6->sin6_port;
|
||||
}
|
||||
|
||||
if (err)
|
||||
errx(1, "%s", gai_strerror(err));
|
||||
}
|
||||
|
||||
static void
|
||||
dump_peer(const nvlist_t *nvl_peer)
|
||||
{
|
||||
const void *key;
|
||||
const struct allowedip *aips;
|
||||
const struct sockaddr *endpoint;
|
||||
char outbuf[WG_MAX_STRLEN];
|
||||
char addr_buf[INET6_ADDRSTRLEN];
|
||||
size_t size;
|
||||
int count, port;
|
||||
|
||||
printf("[Peer]\n");
|
||||
if (nvlist_exists_binary(nvl_peer, "public-key")) {
|
||||
key = nvlist_get_binary(nvl_peer, "public-key", &size);
|
||||
b64_ntop((const uint8_t *)key, size, outbuf, WG_MAX_STRLEN);
|
||||
printf("PublicKey = %s\n", outbuf);
|
||||
}
|
||||
if (nvlist_exists_binary(nvl_peer, "endpoint")) {
|
||||
endpoint = nvlist_get_binary(nvl_peer, "endpoint", &size);
|
||||
sa_ntop(endpoint, addr_buf, &port);
|
||||
printf("Endpoint = %s:%d\n", addr_buf, ntohs(port));
|
||||
}
|
||||
|
||||
if (!nvlist_exists_binary(nvl_peer, "allowed-ips"))
|
||||
return;
|
||||
aips = nvlist_get_binary(nvl_peer, "allowed-ips", &size);
|
||||
if (size == 0 || size % sizeof(struct allowedip) != 0) {
|
||||
errx(1, "size %zu not integer multiple of allowedip", size);
|
||||
}
|
||||
printf("AllowedIPs = ");
|
||||
count = size / sizeof(struct allowedip);
|
||||
for (int i = 0; i < count; i++) {
|
||||
int mask;
|
||||
sa_family_t family;
|
||||
void *bitmask;
|
||||
struct sockaddr *sa;
|
||||
|
||||
sa = __DECONST(void *, &aips[i].a_addr);
|
||||
bitmask = __DECONST(void *,
|
||||
((const struct sockaddr *)&aips->a_mask)->sa_data);
|
||||
family = aips[i].a_addr.ss_family;
|
||||
getnameinfo(sa, sa->sa_len, addr_buf, INET6_ADDRSTRLEN, NULL,
|
||||
0, NI_NUMERICHOST);
|
||||
if (family == AF_INET)
|
||||
mask = in_mask2len(bitmask);
|
||||
else if (family == AF_INET6)
|
||||
mask = in6_mask2len(bitmask, NULL);
|
||||
else
|
||||
errx(1, "bad family in peer %d\n", family);
|
||||
printf("%s/%d", addr_buf, mask);
|
||||
if (i < count -1)
|
||||
printf(", ");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static int
|
||||
get_nvl_out_size(int sock, u_long op, size_t *size)
|
||||
{
|
||||
struct ifdrv ifd;
|
||||
int err;
|
||||
|
||||
memset(&ifd, 0, sizeof(ifd));
|
||||
|
||||
strlcpy(ifd.ifd_name, name, sizeof(ifd.ifd_name));
|
||||
ifd.ifd_cmd = op;
|
||||
ifd.ifd_len = 0;
|
||||
ifd.ifd_data = NULL;
|
||||
|
||||
err = ioctl(sock, SIOCGDRVSPEC, &ifd);
|
||||
if (err)
|
||||
return (err);
|
||||
*size = ifd.ifd_len;
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
do_cmd(int sock, u_long op, void *arg, size_t argsize, int set)
|
||||
{
|
||||
struct ifdrv ifd;
|
||||
|
||||
memset(&ifd, 0, sizeof(ifd));
|
||||
|
||||
strlcpy(ifd.ifd_name, name, sizeof(ifd.ifd_name));
|
||||
ifd.ifd_cmd = op;
|
||||
ifd.ifd_len = argsize;
|
||||
ifd.ifd_data = arg;
|
||||
|
||||
return (ioctl(sock, set ? SIOCSDRVSPEC : SIOCGDRVSPEC, &ifd));
|
||||
}
|
||||
|
||||
static
|
||||
DECL_CMD_FUNC(peerlist, val, d)
|
||||
{
|
||||
size_t size, peercount;
|
||||
void *packed;
|
||||
const nvlist_t *nvl, *nvl_peer;
|
||||
const nvlist_t *const *nvl_peerlist;
|
||||
|
||||
if (get_nvl_out_size(s, WGC_GET, &size))
|
||||
errx(1, "can't get peer list size");
|
||||
if ((packed = malloc(size)) == NULL)
|
||||
errx(1, "malloc failed for peer list");
|
||||
if (do_cmd(s, WGC_GET, packed, size, 0))
|
||||
errx(1, "failed to obtain peer list");
|
||||
|
||||
nvl = nvlist_unpack(packed, size, 0);
|
||||
if (!nvlist_exists_nvlist_array(nvl, "peer-list"))
|
||||
return;
|
||||
nvl_peerlist = nvlist_get_nvlist_array(nvl, "peer-list", &peercount);
|
||||
|
||||
for (int i = 0; i < peercount; i++, nvl_peerlist++) {
|
||||
nvl_peer = *nvl_peerlist;
|
||||
dump_peer(nvl_peer);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
peerfinish(int s, void *arg)
|
||||
{
|
||||
nvlist_t *nvl, **nvl_array;
|
||||
void *packed;
|
||||
size_t size;
|
||||
|
||||
if ((nvl = nvlist_create(0)) == NULL)
|
||||
errx(1, "failed to allocate nvlist");
|
||||
if ((nvl_array = calloc(sizeof(void *), 1)) == NULL)
|
||||
errx(1, "failed to allocate nvl_array");
|
||||
if (!nvlist_exists_binary(nvl_params, "public-key"))
|
||||
errx(1, "must specify a public-key for adding peer");
|
||||
if (!nvlist_exists_binary(nvl_params, "endpoint"))
|
||||
errx(1, "must specify an endpoint for adding peer");
|
||||
if (allowed_ips_count == 0)
|
||||
errx(1, "must specify at least one range of allowed-ips to add a peer");
|
||||
|
||||
nvl_array[0] = nvl_params;
|
||||
nvlist_add_nvlist_array(nvl, "peer-list", (const nvlist_t * const *)nvl_array, 1);
|
||||
packed = nvlist_pack(nvl, &size);
|
||||
|
||||
if (do_cmd(s, WGC_SET, packed, size, true))
|
||||
errx(1, "failed to install peer");
|
||||
}
|
||||
|
||||
static
|
||||
DECL_CMD_FUNC(peerstart, val, d)
|
||||
{
|
||||
do_peer = true;
|
||||
callback_register(peerfinish, NULL);
|
||||
allowed_ips = malloc(ALLOWEDIPS_START * sizeof(struct allowedip));
|
||||
allowed_ips_max = ALLOWEDIPS_START;
|
||||
if (allowed_ips == NULL)
|
||||
errx(1, "failed to allocate array for allowedips");
|
||||
}
|
||||
|
||||
static
|
||||
DECL_CMD_FUNC(setwglistenport, val, d)
|
||||
{
|
||||
struct addrinfo hints, *res;
|
||||
const struct sockaddr_in *sin;
|
||||
const struct sockaddr_in6 *sin6;
|
||||
|
||||
u_long ul;
|
||||
int err;
|
||||
|
||||
bzero(&hints, sizeof(hints));
|
||||
hints.ai_family = AF_UNSPEC;
|
||||
hints.ai_flags = AI_NUMERICHOST;
|
||||
err = getaddrinfo(NULL, val, &hints, &res);
|
||||
if (err)
|
||||
errx(1, "%s", gai_strerror(err));
|
||||
|
||||
if (res->ai_family == AF_INET) {
|
||||
sin = (struct sockaddr_in *)res->ai_addr;
|
||||
ul = sin->sin_port;
|
||||
} else if (res->ai_family == AF_INET6) {
|
||||
sin6 = (struct sockaddr_in6 *)res->ai_addr;
|
||||
ul = sin6->sin6_port;
|
||||
} else {
|
||||
errx(1, "unknown family");
|
||||
}
|
||||
ul = ntohs((u_short)ul);
|
||||
nvlist_add_number(nvl_params, "listen-port", ul);
|
||||
}
|
||||
|
||||
static
|
||||
DECL_CMD_FUNC(setwgprivkey, val, d)
|
||||
{
|
||||
uint8_t key[WG_KEY_LEN];
|
||||
|
||||
if (!key_from_base64(key, val))
|
||||
errx(1, "invalid key %s", val);
|
||||
nvlist_add_binary(nvl_params, "private-key", key, WG_KEY_LEN);
|
||||
}
|
||||
|
||||
static
|
||||
DECL_CMD_FUNC(setwgpubkey, val, d)
|
||||
{
|
||||
uint8_t key[WG_KEY_LEN];
|
||||
|
||||
if (!do_peer)
|
||||
errx(1, "setting public key only valid when adding peer");
|
||||
|
||||
if (!key_from_base64(key, val))
|
||||
errx(1, "invalid key %s", val);
|
||||
nvlist_add_binary(nvl_params, "public-key", key, WG_KEY_LEN);
|
||||
}
|
||||
|
||||
static
|
||||
DECL_CMD_FUNC(setallowedips, val, d)
|
||||
{
|
||||
char *base, *allowedip, *mask;
|
||||
u_long ul;
|
||||
char *endp;
|
||||
struct allowedip *aip;
|
||||
|
||||
if (!do_peer)
|
||||
errx(1, "setting allowed ip only valid when adding peer");
|
||||
if (allowed_ips_count == allowed_ips_max) {
|
||||
/* XXX grow array */
|
||||
}
|
||||
aip = &allowed_ips[allowed_ips_count];
|
||||
base = allowedip = strdup(val);
|
||||
mask = index(allowedip, '/');
|
||||
if (mask == NULL)
|
||||
errx(1, "mask separator not found in allowedip %s", val);
|
||||
*mask = '\0';
|
||||
mask++;
|
||||
parse_ip(aip, allowedip);
|
||||
ul = strtoul(mask, &endp, 0);
|
||||
if (*endp != '\0')
|
||||
errx(1, "invalid value for allowedip mask");
|
||||
bzero(&aip->a_mask, sizeof(aip->a_mask));
|
||||
if (aip->a_addr.ss_family == AF_INET)
|
||||
in_len2mask((struct in_addr *)&((struct sockaddr *)&aip->a_mask)->sa_data, ul);
|
||||
else if (aip->a_addr.ss_family == AF_INET6)
|
||||
in6_prefixlen2mask((struct in6_addr *)&((struct sockaddr *)&aip->a_mask)->sa_data, ul);
|
||||
else
|
||||
errx(1, "invalid address family %d\n", aip->a_addr.ss_family);
|
||||
allowed_ips_count++;
|
||||
if (allowed_ips_count > 1)
|
||||
nvlist_free_binary(nvl_params, "allowed-ips");
|
||||
nvlist_add_binary(nvl_params, "allowed-ips", allowed_ips,
|
||||
allowed_ips_count*sizeof(*aip));
|
||||
|
||||
dump_peer(nvl_params);
|
||||
free(base);
|
||||
}
|
||||
|
||||
static
|
||||
DECL_CMD_FUNC(setendpoint, val, d)
|
||||
{
|
||||
if (!do_peer)
|
||||
errx(1, "setting endpoint only valid when adding peer");
|
||||
parse_endpoint(val);
|
||||
}
|
||||
|
||||
static void
|
||||
wireguard_status(int s)
|
||||
{
|
||||
size_t size;
|
||||
void *packed;
|
||||
nvlist_t *nvl;
|
||||
char buf[WG_KEY_LEN_BASE64];
|
||||
const void *key;
|
||||
uint16_t listen_port;
|
||||
|
||||
if (get_nvl_out_size(s, WGC_GET, &size))
|
||||
return;
|
||||
if ((packed = malloc(size)) == NULL)
|
||||
return;
|
||||
if (do_cmd(s, WGC_GET, packed, size, 0))
|
||||
return;
|
||||
nvl = nvlist_unpack(packed, size, 0);
|
||||
if (nvlist_exists_number(nvl, "listen-port")) {
|
||||
listen_port = nvlist_get_number(nvl, "listen-port");
|
||||
printf("\tlisten-port: %d\n", listen_port);
|
||||
}
|
||||
if (nvlist_exists_binary(nvl, "private-key")) {
|
||||
key = nvlist_get_binary(nvl, "private-key", &size);
|
||||
b64_ntop((const uint8_t *)key, size, buf, WG_MAX_STRLEN);
|
||||
printf("\tprivate-key: %s\n", buf);
|
||||
}
|
||||
if (nvlist_exists_binary(nvl, "public-key")) {
|
||||
key = nvlist_get_binary(nvl, "public-key", &size);
|
||||
b64_ntop((const uint8_t *)key, size, buf, WG_MAX_STRLEN);
|
||||
printf("\tpublic-key: %s\n", buf);
|
||||
}
|
||||
}
|
||||
|
||||
static struct cmd wireguard_cmds[] = {
|
||||
DEF_CLONE_CMD_ARG("listen-port", setwglistenport),
|
||||
DEF_CLONE_CMD_ARG("private-key", setwgprivkey),
|
||||
DEF_CMD("peer-list", 0, peerlist),
|
||||
DEF_CMD("peer", 0, peerstart),
|
||||
DEF_CMD_ARG("public-key", setwgpubkey),
|
||||
DEF_CMD_ARG("allowed-ips", setallowedips),
|
||||
DEF_CMD_ARG("endpoint", setendpoint),
|
||||
};
|
||||
|
||||
static struct afswtch af_wireguard = {
|
||||
.af_name = "af_wireguard",
|
||||
.af_af = AF_UNSPEC,
|
||||
.af_other_status = wireguard_status,
|
||||
};
|
||||
|
||||
static void
|
||||
wg_create(int s, struct ifreq *ifr)
|
||||
{
|
||||
struct iovec iov;
|
||||
void *packed;
|
||||
size_t size;
|
||||
|
||||
setproctitle("ifconfig %s create ...\n", name);
|
||||
if (!nvlist_exists_number(nvl_params, "listen-port"))
|
||||
goto legacy;
|
||||
if (!nvlist_exists_binary(nvl_params, "private-key"))
|
||||
goto legacy;
|
||||
|
||||
packed = nvlist_pack(nvl_params, &size);
|
||||
if (packed == NULL)
|
||||
errx(1, "failed to setup create request");
|
||||
iov.iov_len = size;
|
||||
iov.iov_base = packed;
|
||||
ifr->ifr_data = (caddr_t)&iov;
|
||||
if (ioctl(s, SIOCIFCREATE2, ifr) < 0)
|
||||
err(1, "SIOCIFCREATE2");
|
||||
return;
|
||||
legacy:
|
||||
ifr->ifr_data == NULL;
|
||||
if (ioctl(s, SIOCIFCREATE, ifr) < 0)
|
||||
err(1, "SIOCIFCREATE");
|
||||
}
|
||||
|
||||
static __constructor void
|
||||
wireguard_ctor(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
nvl_params = nvlist_create(0);
|
||||
for (i = 0; i < nitems(wireguard_cmds); i++)
|
||||
cmd_register(&wireguard_cmds[i]);
|
||||
af_register(&af_wireguard);
|
||||
clone_setdefcallback_prefix("wg", wg_create);
|
||||
}
|
||||
|
||||
#endif
|
56
sys/dev/if_wg/include/crypto/blake2s.h
Normal file
56
sys/dev/if_wg/include/crypto/blake2s.h
Normal file
@ -0,0 +1,56 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#ifndef _BLAKE2S_H_
|
||||
#define _BLAKE2S_H_
|
||||
|
||||
|
||||
enum blake2s_lengths {
|
||||
BLAKE2S_BLOCK_SIZE = 64,
|
||||
BLAKE2S_HASH_SIZE = 32,
|
||||
BLAKE2S_KEY_SIZE = 32
|
||||
};
|
||||
|
||||
struct blake2s_state {
|
||||
uint32_t h[8];
|
||||
uint32_t t[2];
|
||||
uint32_t f[2];
|
||||
uint8_t buf[BLAKE2S_BLOCK_SIZE];
|
||||
size_t buflen;
|
||||
uint8_t last_node;
|
||||
};
|
||||
|
||||
void blake2s_init(struct blake2s_state *state, const size_t outlen);
|
||||
void blake2s_init_key(struct blake2s_state *state, const size_t outlen,
|
||||
const void *key, const size_t keylen);
|
||||
void blake2s_update(struct blake2s_state *state, const uint8_t *in, size_t inlen);
|
||||
void blake2s_final(struct blake2s_state *state, uint8_t *out, const size_t outlen);
|
||||
|
||||
static inline void blake2s(uint8_t *out, const uint8_t *in, const uint8_t *key,
|
||||
const size_t outlen, const size_t inlen,
|
||||
const size_t keylen)
|
||||
{
|
||||
struct blake2s_state state;
|
||||
#ifdef __linux___
|
||||
WARN_ON(IS_ENABLED(DEBUG) && ((!in && inlen > 0) || !out || !outlen ||
|
||||
outlen > BLAKE2S_HASH_SIZE || keylen > BLAKE2S_KEY_SIZE ||
|
||||
(!key && keylen)));
|
||||
#endif
|
||||
|
||||
if (keylen)
|
||||
blake2s_init_key(&state, outlen, key, keylen);
|
||||
else
|
||||
blake2s_init(&state, outlen);
|
||||
|
||||
blake2s_update(&state, in, inlen);
|
||||
blake2s_final(&state, out, outlen);
|
||||
}
|
||||
|
||||
void blake2s_hmac(uint8_t *out, const uint8_t *in, const uint8_t *key,
|
||||
const size_t outlen, const size_t inlen, const size_t keylen);
|
||||
|
||||
#endif /* _BLAKE2S_H_ */
|
74
sys/dev/if_wg/include/crypto/curve25519.h
Normal file
74
sys/dev/if_wg/include/crypto/curve25519.h
Normal file
@ -0,0 +1,74 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
|
||||
*
|
||||
* Copyright (c) 2019-2020 Rubicon Communications, LLC (Netgate)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _CURVE25519_H_
|
||||
#define _CURVE25519_H_
|
||||
|
||||
#include <sys/systm.h>
|
||||
|
||||
#define CURVE25519_KEY_SIZE 32
|
||||
|
||||
void curve25519_generic(u8 [CURVE25519_KEY_SIZE],
|
||||
const u8 [CURVE25519_KEY_SIZE],
|
||||
const u8 [CURVE25519_KEY_SIZE]);
|
||||
|
||||
static inline void curve25519_clamp_secret(u8 secret[CURVE25519_KEY_SIZE])
|
||||
{
|
||||
secret[0] &= 248;
|
||||
secret[31] = (secret[31] & 127) | 64;
|
||||
}
|
||||
|
||||
static const u8 null_point[CURVE25519_KEY_SIZE] = { 0 };
|
||||
|
||||
static inline int curve25519(u8 mypublic[CURVE25519_KEY_SIZE],
|
||||
const u8 secret[CURVE25519_KEY_SIZE],
|
||||
const u8 basepoint[CURVE25519_KEY_SIZE])
|
||||
{
|
||||
curve25519_generic(mypublic, secret, basepoint);
|
||||
return timingsafe_bcmp(mypublic, null_point, CURVE25519_KEY_SIZE);
|
||||
}
|
||||
|
||||
static inline int curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE],
|
||||
const u8 secret[CURVE25519_KEY_SIZE])
|
||||
{
|
||||
static const u8 basepoint[CURVE25519_KEY_SIZE] __aligned(32) = { 9 };
|
||||
|
||||
if (timingsafe_bcmp(secret, null_point, CURVE25519_KEY_SIZE) == 0)
|
||||
return 0;
|
||||
|
||||
return curve25519(pub, secret, basepoint);
|
||||
}
|
||||
|
||||
static inline void curve25519_generate_secret(u8 secret[CURVE25519_KEY_SIZE])
|
||||
{
|
||||
arc4random_buf(secret, CURVE25519_KEY_SIZE);
|
||||
curve25519_clamp_secret(secret);
|
||||
}
|
||||
|
||||
#endif /* _CURVE25519_H_ */
|
15
sys/dev/if_wg/include/crypto/zinc.h
Normal file
15
sys/dev/if_wg/include/crypto/zinc.h
Normal file
@ -0,0 +1,15 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _WG_ZINC_H
|
||||
#define _WG_ZINC_H
|
||||
|
||||
int chacha20_mod_init(void);
|
||||
int poly1305_mod_init(void);
|
||||
int chacha20poly1305_mod_init(void);
|
||||
int blake2s_mod_init(void);
|
||||
int curve25519_mod_init(void);
|
||||
|
||||
#endif
|
89
sys/dev/if_wg/include/sys/if_wg_session.h
Normal file
89
sys/dev/if_wg/include/sys/if_wg_session.h
Normal file
@ -0,0 +1,89 @@
|
||||
/*
|
||||
* Copyright (c) 2019 Matt Dunwoodie <ncon@noconroy.net>
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef __IF_WG_H__
|
||||
#define __IF_WG_H__
|
||||
|
||||
#include <net/if.h>
|
||||
#include <netinet/in.h>
|
||||
|
||||
/*
|
||||
* This is the public interface to the WireGuard network interface.
|
||||
*
|
||||
* It is designed to be used by tools such as ifconfig(8) and wg(4).
|
||||
*/
|
||||
|
||||
#define WG_KEY_SIZE 32
|
||||
|
||||
#define WG_DEVICE_HAS_PUBKEY (1 << 0)
|
||||
#define WG_DEVICE_HAS_PRIVKEY (1 << 1)
|
||||
#define WG_DEVICE_HAS_MASKED_PRIVKEY (1 << 2)
|
||||
#define WG_DEVICE_HAS_PORT (1 << 3)
|
||||
#define WG_DEVICE_HAS_RDOMAIN (1 << 4)
|
||||
#define WG_DEVICE_REPLACE_PEERS (1 << 5)
|
||||
|
||||
#define WG_PEER_HAS_PUBKEY (1 << 0)
|
||||
#define WG_PEER_HAS_SHAREDKEY (1 << 1)
|
||||
#define WG_PEER_HAS_MASKED_SHAREDKEY (1 << 2)
|
||||
#define WG_PEER_HAS_ENDPOINT (1 << 3)
|
||||
#define WG_PEER_HAS_PERSISTENTKEEPALIVE (1 << 4)
|
||||
#define WG_PEER_REPLACE_CIDRS (1 << 5)
|
||||
#define WG_PEER_REMOVE (1 << 6)
|
||||
|
||||
#define SIOCSWG _IOWR('i', 200, struct wg_device_io)
|
||||
#define SIOCGWG _IOWR('i', 201, struct wg_device_io)
|
||||
|
||||
#define WG_PEERS_FOREACH(p, d) \
|
||||
for (p = (d)->d_peers; p < (d)->d_peers + (d)->d_num_peers; p++)
|
||||
#define WG_CIDRS_FOREACH(c, p) \
|
||||
for (c = (p)->p_cidrs; c < (p)->p_cidrs + (p)->p_num_cidrs; c++)
|
||||
|
||||
struct wg_allowedip {
|
||||
struct sockaddr_storage a_addr;
|
||||
struct sockaddr_storage a_mask;
|
||||
};
|
||||
|
||||
enum {
|
||||
WG_PEER_CTR_TX_BYTES,
|
||||
WG_PEER_CTR_RX_BYTES,
|
||||
WG_PEER_CTR_NUM,
|
||||
};
|
||||
|
||||
struct wg_device_io {
|
||||
char d_name[IFNAMSIZ];
|
||||
uint8_t d_flags;
|
||||
in_port_t d_port;
|
||||
int d_rdomain;
|
||||
uint8_t d_pubkey[WG_KEY_SIZE];
|
||||
uint8_t d_privkey[WG_KEY_SIZE];
|
||||
size_t d_num_peers;
|
||||
size_t d_num_cidrs;
|
||||
struct wg_peer_io *d_peers;
|
||||
};
|
||||
|
||||
|
||||
#ifndef ENOKEY
|
||||
#define ENOKEY ENOTCAPABLE
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
WGC_GET = 0x5,
|
||||
WGC_SET = 0x6,
|
||||
} wg_cmd_t;
|
||||
|
||||
#endif /* __IF_WG_H__ */
|
322
sys/dev/if_wg/include/sys/if_wg_session_vars.h
Normal file
322
sys/dev/if_wg/include/sys/if_wg_session_vars.h
Normal file
@ -0,0 +1,322 @@
|
||||
/*
|
||||
* Copyright (c) 2019 Matt Dunwoodie <ncon@noconroy.net>
|
||||
* Copyright (c) 2019-2020 Rubicon Communications, LLC (Netgate)
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _IF_WG_VARS_H_
|
||||
#define _IF_WG_VARS_H_
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <crypto/siphash/siphash.h>
|
||||
|
||||
|
||||
#include <net/if.h>
|
||||
#include <net/if_var.h>
|
||||
#include <net/if_types.h>
|
||||
#include <net/ethernet.h>
|
||||
#include <net/pfvar.h>
|
||||
#include <net/iflib.h>
|
||||
|
||||
#include <sys/wg_noise.h>
|
||||
#include <sys/wg_cookie.h>
|
||||
/* This is only needed for wg_keypair. */
|
||||
#include <sys/if_wg_session.h>
|
||||
|
||||
#define UNIMPLEMENTED() panic("%s not implemented\n", __func__)
|
||||
|
||||
#define WG_KEY_SIZE 32
|
||||
#define WG_MSG_PADDING_SIZE 16
|
||||
|
||||
|
||||
/* Constant for session */
|
||||
#define REKEY_TIMEOUT 5
|
||||
#define REKEY_TIMEOUT_JITTER 500 /* TODO ok? jason */
|
||||
#define REJECT_AFTER_TIME 180
|
||||
#define KEEPALIVE_TIMEOUT 10
|
||||
#define MAX_TIMER_HANDSHAKES (90 / REKEY_TIMEOUT)
|
||||
#define NEW_HANDSHAKE_TIMEOUT (REKEY_TIMEOUT + KEEPALIVE_TIMEOUT)
|
||||
|
||||
#define MAX_QUEUED_INCOMING_HANDSHAKES 4096 /* TODO: replace this with DQL */
|
||||
#define MAX_QUEUED_PACKETS 1024 /* TODO: replace this with DQL */
|
||||
|
||||
#define HASHTABLE_PEER_SIZE (1 << 6) //1 << 11
|
||||
#define HASHTABLE_INDEX_SIZE (HASHTABLE_PEER_SIZE * 3) //1 << 13
|
||||
|
||||
#define PEER_MAGIC1 0xCAFEBABEB00FDADDULL
|
||||
#define PEER_MAGIC2 0xCAAFD0D0D00DBABEULL
|
||||
#define PEER_MAGIC3 0xD00DBABEF00DFADEULL
|
||||
|
||||
|
||||
enum message_type {
|
||||
MESSAGE_INVALID = 0,
|
||||
MESSAGE_HANDSHAKE_INITIATION = 1,
|
||||
MESSAGE_HANDSHAKE_RESPONSE = 2,
|
||||
MESSAGE_HANDSHAKE_COOKIE = 3,
|
||||
MESSAGE_DATA = 4
|
||||
};
|
||||
|
||||
struct wg_softc;
|
||||
|
||||
#if __FreeBSD_version > 1300000
|
||||
typedef void timeout_t (void *);
|
||||
#endif
|
||||
|
||||
/* Socket */
|
||||
struct wg_endpoint {
|
||||
union wg_remote {
|
||||
struct sockaddr r_sa;
|
||||
struct sockaddr_in r_sin;
|
||||
struct sockaddr_in6 r_sin6;
|
||||
} e_remote;
|
||||
union wg_source {
|
||||
struct in_addr l_in;
|
||||
struct in6_pktinfo l_pktinfo6;
|
||||
#define l_in6 l_pktinfo6.ipi6_addr
|
||||
} e_local;
|
||||
};
|
||||
|
||||
struct wg_socket {
|
||||
struct mtx so_mtx;
|
||||
in_port_t so_port;
|
||||
struct socket *so_so4;
|
||||
struct socket *so_so6;
|
||||
};
|
||||
|
||||
struct wg_queue {
|
||||
struct mtx q_mtx;
|
||||
struct mbufq q;
|
||||
};
|
||||
|
||||
struct wg_index {
|
||||
LIST_ENTRY(wg_index) i_entry;
|
||||
SLIST_ENTRY(wg_index) i_unused_entry;
|
||||
uint32_t i_key;
|
||||
struct noise_remote *i_value;
|
||||
};
|
||||
|
||||
struct wg_timers {
|
||||
/* t_lock is for blocking wg_timers_event_* when setting t_disabled. */
|
||||
struct rwlock t_lock;
|
||||
|
||||
int t_disabled;
|
||||
int t_need_another_keepalive;
|
||||
uint16_t t_persistent_keepalive_interval;
|
||||
struct callout t_new_handshake;
|
||||
struct callout t_send_keepalive;
|
||||
struct callout t_retry_handshake;
|
||||
struct callout t_zero_key_material;
|
||||
struct callout t_persistent_keepalive;
|
||||
|
||||
struct mtx t_handshake_mtx;
|
||||
struct timespec t_handshake_last_sent;
|
||||
struct timespec t_handshake_complete;
|
||||
volatile int t_handshake_retries;
|
||||
|
||||
};
|
||||
|
||||
struct wg_peer {
|
||||
uint64_t p_magic_1;
|
||||
CK_LIST_ENTRY(wg_peer) p_hash_entry;
|
||||
CK_LIST_ENTRY(wg_peer) p_entry;
|
||||
uint64_t p_id;
|
||||
struct wg_softc *p_sc;
|
||||
|
||||
struct noise_remote p_remote;
|
||||
struct cookie_maker p_cookie;
|
||||
struct wg_timers p_timers;
|
||||
|
||||
struct rwlock p_endpoint_lock;
|
||||
struct wg_endpoint p_endpoint;
|
||||
|
||||
uint64_t p_magic_2;
|
||||
|
||||
SLIST_HEAD(,wg_index) p_unused_index;
|
||||
struct wg_index p_index[3];
|
||||
|
||||
struct wg_queue p_encap_queue;
|
||||
struct wg_queue p_decap_queue;
|
||||
|
||||
struct grouptask p_clear_secrets;
|
||||
struct grouptask p_send_initiation;
|
||||
struct grouptask p_send_keepalive;
|
||||
struct grouptask p_send;
|
||||
struct grouptask p_recv;
|
||||
|
||||
counter_u64_t p_tx_bytes;
|
||||
counter_u64_t p_rx_bytes;
|
||||
|
||||
CK_LIST_HEAD(, wg_route) p_routes;
|
||||
uint64_t p_magic_3;
|
||||
struct mtx p_lock;
|
||||
struct epoch_context p_ctx;
|
||||
};
|
||||
|
||||
|
||||
|
||||
/* Packet */
|
||||
|
||||
void wg_softc_decrypt(struct wg_softc *);
|
||||
void wg_softc_encrypt(struct wg_softc *);
|
||||
|
||||
/* Queue */
|
||||
void wg_queue_init(struct wg_queue *, const char *);
|
||||
void wg_queue_deinit(struct wg_queue *);
|
||||
|
||||
/* Counter */
|
||||
|
||||
/* Timers */
|
||||
|
||||
/* Route */
|
||||
enum route_direction {
|
||||
IN,
|
||||
OUT,
|
||||
};
|
||||
|
||||
struct wg_route_table {
|
||||
size_t t_count;
|
||||
struct radix_node_head *t_ip;
|
||||
struct radix_node_head *t_ip6;
|
||||
};
|
||||
struct wg_peer;
|
||||
|
||||
struct wg_route {
|
||||
struct radix_node r_nodes[2];
|
||||
struct wg_allowedip r_cidr;
|
||||
CK_LIST_ENTRY(wg_route) r_entry;
|
||||
struct wg_peer *r_peer;
|
||||
};
|
||||
|
||||
|
||||
int wg_route_add(struct wg_route_table *, struct wg_peer *,
|
||||
const struct wg_allowedip *);
|
||||
int wg_route_delete(struct wg_route_table *, struct wg_peer *);
|
||||
|
||||
/* Noise */
|
||||
|
||||
/*
|
||||
* Peer
|
||||
*
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
struct wg_softc;
|
||||
|
||||
struct wg_hashtable {
|
||||
struct mtx h_mtx;
|
||||
SIPHASH_KEY h_secret;
|
||||
CK_LIST_HEAD(, wg_peer) h_peers_list;
|
||||
CK_LIST_HEAD(, wg_peer) *h_peers;
|
||||
u_long h_peers_mask;
|
||||
size_t h_num_peers;
|
||||
LIST_HEAD(, noise_keypair) *h_keys;
|
||||
u_long h_keys_mask;
|
||||
size_t h_num_keys;
|
||||
};
|
||||
|
||||
/* Softc */
|
||||
struct wg_softc {
|
||||
if_softc_ctx_t shared;
|
||||
if_ctx_t wg_ctx;
|
||||
struct ifnet *sc_ifp;
|
||||
uint16_t sc_incoming_port;
|
||||
uint32_t sc_user_cookie;
|
||||
|
||||
struct wg_socket sc_socket;
|
||||
struct wg_hashtable sc_hashtable;
|
||||
struct wg_route_table sc_routes;
|
||||
|
||||
struct mbufq sc_handshake_queue;
|
||||
struct grouptask sc_handshake;
|
||||
|
||||
struct noise_local sc_local;
|
||||
struct cookie_checker sc_cookie;
|
||||
|
||||
struct buf_ring *sc_encap_ring;
|
||||
struct buf_ring *sc_decap_ring;
|
||||
|
||||
struct grouptask *sc_encrypt;
|
||||
struct grouptask *sc_decrypt;
|
||||
|
||||
struct rwlock sc_index_lock;
|
||||
LIST_HEAD(,wg_index) *sc_index;
|
||||
u_long sc_index_mask;
|
||||
|
||||
struct mtx sc_mtx;
|
||||
};
|
||||
|
||||
struct wg_tag {
|
||||
struct m_tag wt_tag;
|
||||
struct wg_endpoint t_endpoint;
|
||||
struct wg_peer *t_peer;
|
||||
struct mbuf *t_mbuf;
|
||||
sa_family_t t_family;
|
||||
int t_done;
|
||||
int t_mtu;
|
||||
};
|
||||
|
||||
int wg_route_add(struct wg_route_table *tbl, struct wg_peer *peer,
|
||||
const struct wg_allowedip *cidr_);
|
||||
|
||||
struct wg_peer *wg_route_lookup(struct wg_route_table *, struct mbuf *,
|
||||
enum route_direction);
|
||||
|
||||
void wg_peer_remove_all(struct wg_softc *);
|
||||
struct wg_peer *wg_peer_alloc(struct wg_softc *);
|
||||
void wg_peer_destroy(struct wg_peer *);
|
||||
|
||||
void wg_hashtable_init(struct wg_hashtable *);
|
||||
void wg_hashtable_destroy(struct wg_hashtable *);
|
||||
void wg_hashtable_peer_insert(struct wg_hashtable *, struct wg_peer *);
|
||||
struct wg_peer *wg_peer_lookup(struct wg_softc *,
|
||||
const uint8_t [WG_KEY_SIZE]);
|
||||
void wg_hashtable_peer_remove(struct wg_hashtable *, struct wg_peer *);
|
||||
|
||||
|
||||
int wg_queue_out(struct wg_peer *peer, struct mbuf *m);
|
||||
|
||||
int wg_route_init(struct wg_route_table *);
|
||||
void wg_route_destroy(struct wg_route_table *);
|
||||
|
||||
int wg_socket_init(struct wg_softc *sc);
|
||||
void wg_socket_reinit(struct wg_softc *, struct socket *so4,
|
||||
struct socket *so6);
|
||||
int wg_socket_close(struct wg_socket *so);
|
||||
|
||||
void wg_softc_handshake_receive(struct wg_softc *sc);
|
||||
|
||||
int wg_timers_get_persistent_keepalive(struct wg_timers *, uint16_t *);
|
||||
void wg_timers_set_persistent_keepalive(struct wg_timers *t, uint16_t);
|
||||
void wg_timers_get_last_handshake(struct wg_timers *, struct timespec *);
|
||||
|
||||
|
||||
struct noise_remote *wg_remote_get(struct wg_softc *, uint8_t [NOISE_KEY_SIZE]);
|
||||
uint32_t wg_index_set(struct wg_softc *, struct noise_remote *);
|
||||
struct noise_remote *wg_index_get(struct wg_softc *, uint32_t);
|
||||
void wg_index_drop(struct wg_softc *, uint32_t);
|
||||
void wg_encrypt_dispatch(struct wg_softc *);
|
||||
void wg_decrypt_dispatch(struct wg_softc *);
|
||||
|
||||
struct wg_tag *wg_tag_get(struct mbuf *m);
|
||||
|
||||
|
||||
#endif /* _IF_WG_VARS_H_ */
|
74
sys/dev/if_wg/include/sys/simd-x86_64.h
Normal file
74
sys/dev/if_wg/include/sys/simd-x86_64.h
Normal file
@ -0,0 +1,74 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
|
||||
*
|
||||
* Copyright (c) 2019-2020 Rubicon Communications, LLC (Netgate)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _SIMD_X86_64_H_
|
||||
#define _SIMD_X86_64_H_
|
||||
|
||||
|
||||
#include <x86/x86_var.h>
|
||||
#include <x86/specialreg.h>
|
||||
|
||||
static inline uint64_t
|
||||
xgetbv(uint32_t index)
|
||||
{
|
||||
uint32_t eax, edx;
|
||||
/* xgetbv - instruction byte code */
|
||||
__asm__ __volatile__(".byte 0x0f; .byte 0x01; .byte 0xd0"
|
||||
: "=a" (eax), "=d" (edx)
|
||||
: "c" (index));
|
||||
|
||||
return ((((uint64_t)edx)<<32) | (uint64_t)eax);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Detect register set support
|
||||
*/
|
||||
static inline boolean_t
|
||||
__simd_state_enabled(const uint64_t state)
|
||||
{
|
||||
boolean_t has_osxsave;
|
||||
uint64_t xcr0;
|
||||
|
||||
has_osxsave = !!(cpu_feature2 & CPUID2_OSXSAVE);
|
||||
|
||||
if (!has_osxsave)
|
||||
return (0);
|
||||
|
||||
xcr0 = xgetbv(0);
|
||||
return ((xcr0 & state) == state);
|
||||
}
|
||||
|
||||
#define _XSTATE_SSE_AVX (0x2 | 0x4)
|
||||
#define _XSTATE_AVX512 (0xE0 | _XSTATE_SSE_AVX)
|
||||
|
||||
#define __ymm_enabled() __simd_state_enabled(_XSTATE_SSE_AVX)
|
||||
#define __zmm_enabled() __simd_state_enabled(_XSTATE_AVX512)
|
||||
#endif
|
||||
|
342
sys/dev/if_wg/include/sys/support.h
Normal file
342
sys/dev/if_wg/include/sys/support.h
Normal file
@ -0,0 +1,342 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
|
||||
*
|
||||
* Copyright (c) 2019-2020 Rubicon Communications, LLC (Netgate)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef SYS_SUPPORT_H_
|
||||
#define SYS_SUPPORT_H_
|
||||
#ifdef __LOCORE
|
||||
#include <machine/asm.h>
|
||||
#define SYM_FUNC_START ENTRY
|
||||
#define SYM_FUNC_END END
|
||||
|
||||
#else
|
||||
#include <sys/types.h>
|
||||
#include <sys/limits.h>
|
||||
#include <sys/endian.h>
|
||||
#include <sys/libkern.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/lock.h>
|
||||
#include <vm/uma.h>
|
||||
|
||||
#if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
|
||||
#include <machine/fpu.h>
|
||||
#endif
|
||||
#include <crypto/siphash/siphash.h>
|
||||
|
||||
|
||||
#define COMPAT_ZINC_IS_A_MODULE
|
||||
MALLOC_DECLARE(M_WG);
|
||||
|
||||
#define BUILD_BUG_ON(x) CTASSERT(!(x))
|
||||
|
||||
#define BIT(nr) (1UL << (nr))
|
||||
#define BIT_ULL(nr) (1ULL << (nr))
|
||||
#ifdef __LP64__
|
||||
#define BITS_PER_LONG 64
|
||||
#else
|
||||
#define BITS_PER_LONG 32
|
||||
#endif
|
||||
|
||||
#define rw_enter_write rw_wlock
|
||||
#define rw_exit_write rw_wunlock
|
||||
#define rw_enter_read rw_rlock
|
||||
#define rw_exit_read rw_runlock
|
||||
#define rw_exit rw_unlock
|
||||
|
||||
#define ASSERT(x) MPASS(x)
|
||||
|
||||
#define ___PASTE(a,b) a##b
|
||||
#define __PASTE(a,b) ___PASTE(a,b)
|
||||
#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
|
||||
|
||||
#define typeof(x) __typeof__(x)
|
||||
|
||||
|
||||
#define min_t(t, a, b) ({ t __a = (a); t __b = (b); __a > __b ? __b : __a; })
|
||||
|
||||
typedef uint8_t u8;
|
||||
typedef uint16_t u16;
|
||||
typedef uint32_t u32;
|
||||
typedef uint32_t __le32;
|
||||
typedef uint64_t u64;
|
||||
typedef uint64_t __le64;
|
||||
|
||||
#define __must_check __attribute__((__warn_unused_result__))
|
||||
#define asmlinkage
|
||||
#define __ro_after_init __read_mostly
|
||||
|
||||
#define get_unaligned_le32(x) le32dec(x)
|
||||
#define get_unaligned_le64(x) le64dec(x)
|
||||
|
||||
#define cpu_to_le64(x) htole64(x)
|
||||
#define cpu_to_le32(x) htole32(x)
|
||||
#define letoh64(x) le64toh(x)
|
||||
|
||||
#define need_resched() \
|
||||
((curthread->td_flags & (TDF_NEEDRESCHED|TDF_ASTPENDING)) || \
|
||||
curthread->td_owepreempt)
|
||||
|
||||
|
||||
#define CONTAINER_OF(a, b, c) __containerof((a), b, c)
|
||||
|
||||
typedef struct {
|
||||
uint64_t k0;
|
||||
uint64_t k1;
|
||||
} SIPHASH_KEY;
|
||||
|
||||
static inline uint64_t
|
||||
siphash24(const SIPHASH_KEY *key, const void *src, size_t len)
|
||||
{
|
||||
SIPHASH_CTX ctx;
|
||||
|
||||
return (SipHashX(&ctx, 2, 4, (const uint8_t *)key, src, len));
|
||||
}
|
||||
|
||||
static inline void
|
||||
put_unaligned_le32(u32 val, void *p)
|
||||
{
|
||||
*((__le32 *)p) = cpu_to_le32(val);
|
||||
}
|
||||
|
||||
|
||||
#define rol32(i32, n) ((i32) << (n) | (i32) >> (32 - (n)))
|
||||
|
||||
#define memzero_explicit(p, s) explicit_bzero(p, s)
|
||||
|
||||
#define EXPORT_SYMBOL(x)
|
||||
|
||||
#define U32_MAX ((u32)~0U)
|
||||
#if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
|
||||
#define kfpu_begin(ctx) { \
|
||||
if (ctx->sc_fpu_ctx == NULL) { \
|
||||
ctx->sc_fpu_ctx = fpu_kern_alloc_ctx(0); \
|
||||
} \
|
||||
critical_enter(); \
|
||||
fpu_kern_enter(curthread, ctx->sc_fpu_ctx, FPU_KERN_NORMAL); \
|
||||
}
|
||||
|
||||
#define kfpu_end(ctx) { \
|
||||
MPASS(ctx->sc_fpu_ctx != NULL); \
|
||||
fpu_kern_leave(curthread, ctx->sc_fpu_ctx); \
|
||||
critical_exit(); \
|
||||
}
|
||||
#else
|
||||
#define kfpu_begin(ctx)
|
||||
#define kfpu_end(ctx)
|
||||
#define fpu_kern_free_ctx(p)
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
HAVE_NO_SIMD = 1 << 0,
|
||||
HAVE_FULL_SIMD = 1 << 1,
|
||||
HAVE_SIMD_IN_USE = 1 << 31
|
||||
} simd_context_state_t;
|
||||
|
||||
typedef struct {
|
||||
simd_context_state_t sc_state;
|
||||
struct fpu_kern_ctx *sc_fpu_ctx;
|
||||
} simd_context_t;
|
||||
|
||||
|
||||
#define DONT_USE_SIMD NULL
|
||||
|
||||
static __must_check inline bool
|
||||
may_use_simd(void)
|
||||
{
|
||||
#if defined(__amd64__)
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void
|
||||
simd_get(simd_context_t *ctx)
|
||||
{
|
||||
ctx->sc_state = may_use_simd() ? HAVE_FULL_SIMD : HAVE_NO_SIMD;
|
||||
}
|
||||
|
||||
static inline void
|
||||
simd_put(simd_context_t *ctx)
|
||||
{
|
||||
#if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
|
||||
if (is_fpu_kern_thread(0))
|
||||
return;
|
||||
#endif
|
||||
if (ctx->sc_state & HAVE_SIMD_IN_USE)
|
||||
kfpu_end(ctx);
|
||||
ctx->sc_state = HAVE_NO_SIMD;
|
||||
}
|
||||
|
||||
static __must_check inline bool
|
||||
simd_use(simd_context_t *ctx)
|
||||
{
|
||||
#if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
|
||||
if (is_fpu_kern_thread(0))
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
if (ctx == NULL)
|
||||
return false;
|
||||
if (!(ctx->sc_state & HAVE_FULL_SIMD))
|
||||
return false;
|
||||
if (ctx->sc_state & HAVE_SIMD_IN_USE)
|
||||
return true;
|
||||
kfpu_begin(ctx);
|
||||
ctx->sc_state |= HAVE_SIMD_IN_USE;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
simd_relax(simd_context_t *ctx)
|
||||
{
|
||||
if ((ctx->sc_state & HAVE_SIMD_IN_USE) && need_resched()) {
|
||||
simd_put(ctx);
|
||||
simd_get(ctx);
|
||||
return simd_use(ctx);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#define unlikely(x) __predict_false(x)
|
||||
#define likely(x) __predict_true(x)
|
||||
/* Generic path for arbitrary size */
|
||||
|
||||
|
||||
static inline unsigned long
|
||||
__crypto_memneq_generic(const void *a, const void *b, size_t size)
|
||||
{
|
||||
unsigned long neq = 0;
|
||||
|
||||
while (size >= sizeof(unsigned long)) {
|
||||
neq |= *(const unsigned long *)a ^ *(const unsigned long *)b;
|
||||
__compiler_membar();
|
||||
a = ((const char *)a + sizeof(unsigned long));
|
||||
b = ((const char *)b + sizeof(unsigned long));
|
||||
size -= sizeof(unsigned long);
|
||||
}
|
||||
while (size > 0) {
|
||||
neq |= *(const unsigned char *)a ^ *(const unsigned char *)b;
|
||||
__compiler_membar();
|
||||
a = (const char *)a + 1;
|
||||
b = (const char *)b + 1;
|
||||
size -= 1;
|
||||
}
|
||||
return neq;
|
||||
}
|
||||
|
||||
#define crypto_memneq(a, b, c) __crypto_memneq_generic((a), (b), (c))
|
||||
|
||||
static inline void
|
||||
__cpu_to_le32s(uint32_t *buf)
|
||||
{
|
||||
*buf = htole32(*buf);
|
||||
}
|
||||
|
||||
static inline void cpu_to_le32_array(u32 *buf, unsigned int words)
|
||||
{
|
||||
while (words--) {
|
||||
__cpu_to_le32s(buf);
|
||||
buf++;
|
||||
}
|
||||
}
|
||||
|
||||
#define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
|
||||
void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int len);
|
||||
|
||||
static inline void crypto_xor_cpy(u8 *dst, const u8 *src1, const u8 *src2,
|
||||
unsigned int size)
|
||||
{
|
||||
if (CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS &&
|
||||
__builtin_constant_p(size) &&
|
||||
(size % sizeof(unsigned long)) == 0) {
|
||||
unsigned long *d = (unsigned long *)dst;
|
||||
const unsigned long *s1 = (const unsigned long *)src1;
|
||||
const unsigned long *s2 = (const unsigned long *)src2;
|
||||
|
||||
while (size > 0) {
|
||||
*d++ = *s1++ ^ *s2++;
|
||||
size -= sizeof(unsigned long);
|
||||
}
|
||||
} else {
|
||||
__crypto_xor(dst, src1, src2, size);
|
||||
}
|
||||
}
|
||||
#include <sys/kernel.h>
|
||||
#define module_init(fn) \
|
||||
static void \
|
||||
wrap_ ## fn(void *dummy __unused) \
|
||||
{ \
|
||||
fn(); \
|
||||
} \
|
||||
SYSINIT(zfs_ ## fn, SI_SUB_LAST, SI_ORDER_FIRST, wrap_ ## fn, NULL)
|
||||
|
||||
|
||||
#define module_exit(fn) \
|
||||
static void \
|
||||
wrap_ ## fn(void *dummy __unused) \
|
||||
{ \
|
||||
fn(); \
|
||||
} \
|
||||
SYSUNINIT(zfs_ ## fn, SI_SUB_LAST, SI_ORDER_FIRST, wrap_ ## fn, NULL)
|
||||
|
||||
#define module_param(a, b, c)
|
||||
#define MODULE_LICENSE(x)
|
||||
#define MODULE_DESCRIPTION(x)
|
||||
#define MODULE_AUTHOR(x)
|
||||
|
||||
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
|
||||
|
||||
#define __initconst
|
||||
#define __initdata
|
||||
#define __init
|
||||
#define __exit
|
||||
#define BUG() panic("%s:%d bug hit!\n", __FILE__, __LINE__)
|
||||
|
||||
#define WARN_ON(cond) ({ \
|
||||
bool __ret = (cond); \
|
||||
if (__ret) { \
|
||||
printf("WARNING %s failed at %s:%d\n", \
|
||||
__stringify(cond), __FILE__, __LINE__); \
|
||||
} \
|
||||
unlikely(__ret); \
|
||||
})
|
||||
|
||||
#define pr_err printf
|
||||
#define pr_info printf
|
||||
#define IS_ENABLED(x) 0
|
||||
#define ___stringify(...) #__VA_ARGS__
|
||||
#define __stringify(...) ___stringify(__VA_ARGS__)
|
||||
#define kmalloc(size, flag) malloc((size), M_WG, M_WAITOK)
|
||||
#define kfree(p) free(p, M_WG)
|
||||
#define vzalloc(size) malloc((size), M_WG, M_WAITOK|M_ZERO)
|
||||
#define vfree(p) free(p, M_WG)
|
||||
#endif
|
||||
#endif
|
174
sys/dev/if_wg/include/sys/wg_cookie.h
Normal file
174
sys/dev/if_wg/include/sys/wg_cookie.h
Normal file
@ -0,0 +1,174 @@
|
||||
/*
|
||||
* Copyright (C) 2015-2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
* Copyright (C) 2019-2020 Matt Dunwoodie <ncon@noconroy.net>
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
* ======== wg_cookie.h ========
|
||||
*
|
||||
* This file provides a thread safe interface to the WireGuard cookie
|
||||
* mechanism. It is split into three parts:
|
||||
*
|
||||
* * cookie_maker
|
||||
* Used to create MACs for messages.
|
||||
* * cookie_checker
|
||||
* Used to validate MACs for messages.
|
||||
* * cookie_macs
|
||||
* The MACs that authenticate the message.
|
||||
*
|
||||
* The MACs provide two properties:
|
||||
* * mac1 - That the remote end knows a value.
|
||||
* * mac2 - That the remote end has a specific IP address.
|
||||
*
|
||||
* void cookie_maker_init(cookie_maker, ipl, input)
|
||||
* - Initialise cookie_maker, should only be called once and before use.
|
||||
* input is the shared value used for mac1.
|
||||
*
|
||||
* int cookie_checker_init(cookie_checker, ipl)
|
||||
* - Initialise cookie_checker, should only be called once and before use. It
|
||||
* will return ENOBUFS if it cannot allocate required memory.
|
||||
*
|
||||
* void cookie_checker_update(cookie_checker, input)
|
||||
* - Set the input value to check mac1 against.
|
||||
*
|
||||
* void cookie_checker_deinit(cookie_checker)
|
||||
* - Destroy all values associated with cookie_checker. cookie_checker must
|
||||
* not be used after calling this function.
|
||||
*
|
||||
* void cookie_checker_create_payload(cookie_checker, cookie_macs, nonce,
|
||||
* payload, sockaddr)
|
||||
* - Create a specific payload derived from the sockaddr. The payload is an
|
||||
* encrypted shared secret, that the cookie_maker will decrypt and used to
|
||||
* key the mac2 value.
|
||||
*
|
||||
* int cookie_maker_consume_payload(cookie_maker, nonce, payload)
|
||||
* - Have cookie_maker consume the payload.
|
||||
*
|
||||
* void cookie_maker_mac(cookie_maker, cookie_macs, message, len)
|
||||
* - Create cookie_macs for the message of length len. It will always compute
|
||||
* mac1, however will only compute mac2 if we have recently received a
|
||||
* payload to key it with.
|
||||
*
|
||||
* int cookie_checker_validate_macs(cookie_checker, cookie_macs, message, len,
|
||||
* busy, sockaddr)
|
||||
* - Use cookie_checker to validate the cookie_macs of message with length
|
||||
* len. If busy, then ratelimiting will be applied to the sockaddr.
|
||||
*
|
||||
* ==========================
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef __COOKIE_H__
|
||||
#define __COOKIE_H__
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/rwlock.h>
|
||||
#include <sys/queue.h>
|
||||
#include <sys/support.h>
|
||||
|
||||
#include <netinet/in.h>
|
||||
|
||||
#include <crypto/blake2s.h>
|
||||
|
||||
#define COOKIE_MAC_SIZE 16
|
||||
#define COOKIE_KEY_SIZE 32
|
||||
#define COOKIE_XNONCE_SIZE 24
|
||||
#define COOKIE_COOKIE_SIZE 16
|
||||
#define COOKIE_SECRET_SIZE 32
|
||||
#define COOKIE_INPUT_SIZE 32
|
||||
#define COOKIE_ENCRYPTED_SIZE (COOKIE_COOKIE_SIZE + COOKIE_MAC_SIZE)
|
||||
|
||||
#define COOKIE_MAC1_KEY_LABEL "mac1----"
|
||||
#define COOKIE_COOKIE_KEY_LABEL "cookie--"
|
||||
#define COOKIE_SECRET_MAX_AGE 120
|
||||
#define COOKIE_SECRET_LATENCY 5
|
||||
|
||||
/* Constants for initiation rate limiting */
|
||||
#define RATELIMIT_SIZE (1 << 10)
|
||||
#define RATELIMIT_SIZE_MAX (RATELIMIT_SIZE * 8)
|
||||
#define NSEC_PER_SEC 1000000000LL
|
||||
#define INITIATIONS_PER_SECOND 50
|
||||
#define INITIATIONS_BURSTABLE 10
|
||||
#define INITIATION_COST (NSEC_PER_SEC / INITIATIONS_PER_SECOND)
|
||||
#define TOKEN_MAX (INITIATION_COST * INITIATIONS_BURSTABLE)
|
||||
#define ELEMENT_TIMEOUT 1
|
||||
#define IPV4_MASK_SIZE 4 /* Use all 4 bytes of IPv4 address */
|
||||
#define IPV6_MASK_SIZE 8 /* Use top 8 bytes (/64) of IPv6 address */
|
||||
|
||||
struct cookie_macs {
|
||||
uint8_t mac1[COOKIE_MAC_SIZE];
|
||||
uint8_t mac2[COOKIE_MAC_SIZE];
|
||||
} __packed;
|
||||
|
||||
struct ratelimit_entry {
|
||||
LIST_ENTRY(ratelimit_entry) r_entry;
|
||||
sa_family_t r_af;
|
||||
union {
|
||||
struct in_addr r_in;
|
||||
struct in6_addr r_in6;
|
||||
};
|
||||
struct timespec r_last_time; /* nanouptime */
|
||||
uint64_t r_tokens;
|
||||
};
|
||||
|
||||
struct ratelimit {
|
||||
SIPHASH_KEY rl_secret;
|
||||
uma_zone_t rl_zone;
|
||||
|
||||
struct rwlock rl_lock;
|
||||
LIST_HEAD(, ratelimit_entry) *rl_table;
|
||||
u_long rl_table_mask;
|
||||
size_t rl_table_num;
|
||||
struct timespec rl_last_gc; /* nanouptime */
|
||||
};
|
||||
|
||||
struct cookie_maker {
|
||||
uint8_t cp_mac1_key[COOKIE_KEY_SIZE];
|
||||
uint8_t cp_cookie_key[COOKIE_KEY_SIZE];
|
||||
|
||||
struct rwlock cp_lock;
|
||||
uint8_t cp_cookie[COOKIE_COOKIE_SIZE];
|
||||
struct timespec cp_birthdate; /* nanouptime */
|
||||
int cp_mac1_valid;
|
||||
uint8_t cp_mac1_last[COOKIE_MAC_SIZE];
|
||||
};
|
||||
|
||||
struct cookie_checker {
|
||||
struct ratelimit cc_ratelimit;
|
||||
|
||||
struct rwlock cc_key_lock;
|
||||
uint8_t cc_mac1_key[COOKIE_KEY_SIZE];
|
||||
uint8_t cc_cookie_key[COOKIE_KEY_SIZE];
|
||||
|
||||
struct rwlock cc_secret_lock;
|
||||
struct timespec cc_secret_birthdate; /* nanouptime */
|
||||
uint8_t cc_secret[COOKIE_SECRET_SIZE];
|
||||
};
|
||||
|
||||
void cookie_maker_init(struct cookie_maker *, const uint8_t[COOKIE_INPUT_SIZE]);
|
||||
int cookie_checker_init(struct cookie_checker *, uma_zone_t);
|
||||
void cookie_checker_update(struct cookie_checker *,
|
||||
uint8_t[COOKIE_INPUT_SIZE]);
|
||||
void cookie_checker_deinit(struct cookie_checker *);
|
||||
void cookie_checker_create_payload(struct cookie_checker *,
|
||||
struct cookie_macs *cm, uint8_t[COOKIE_XNONCE_SIZE],
|
||||
uint8_t [COOKIE_ENCRYPTED_SIZE], struct sockaddr *);
|
||||
int cookie_maker_consume_payload(struct cookie_maker *,
|
||||
uint8_t[COOKIE_XNONCE_SIZE], uint8_t[COOKIE_ENCRYPTED_SIZE]);
|
||||
void cookie_maker_mac(struct cookie_maker *, struct cookie_macs *,
|
||||
void *, size_t);
|
||||
int cookie_checker_validate_macs(struct cookie_checker *,
|
||||
struct cookie_macs *, void *, size_t, int, struct sockaddr *);
|
||||
|
||||
#endif /* __COOKIE_H__ */
|
123
sys/dev/if_wg/include/sys/wg_module.h
Normal file
123
sys/dev/if_wg/include/sys/wg_module.h
Normal file
@ -0,0 +1,123 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
|
||||
*
|
||||
* Copyright (c) 2019-2020 Rubicon Communications, LLC (Netgate)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
#ifndef MODULE_H_
|
||||
#define MODULE_H_
|
||||
|
||||
#include <sys/mbuf.h>
|
||||
#include <sys/socket.h>
|
||||
#include <net/if.h>
|
||||
#include <net/if_var.h>
|
||||
#include <sys/support.h>
|
||||
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/epoch.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mutex.h>
|
||||
|
||||
|
||||
|
||||
#include <crypto/curve25519.h>
|
||||
#include <zinc/chacha20poly1305.h>
|
||||
#include <crypto/blake2s.h>
|
||||
|
||||
MALLOC_DECLARE(M_WG);
|
||||
|
||||
|
||||
enum noise_lengths {
|
||||
NOISE_PUBLIC_KEY_LEN = CURVE25519_KEY_SIZE,
|
||||
NOISE_SYMMETRIC_KEY_LEN = CHACHA20POLY1305_KEY_SIZE,
|
||||
NOISE_TIMESTAMP_LEN = sizeof(uint64_t) + sizeof(uint32_t),
|
||||
NOISE_AUTHTAG_LEN = CHACHA20POLY1305_AUTHTAG_SIZE,
|
||||
NOISE_HASH_LEN = BLAKE2S_HASH_SIZE
|
||||
};
|
||||
|
||||
#define noise_encrypted_len(plain_len) ((plain_len) + NOISE_AUTHTAG_LEN)
|
||||
|
||||
enum cookie_values {
|
||||
COOKIE_SECRET_MAX_AGE = 2 * 60,
|
||||
COOKIE_SECRET_LATENCY = 5,
|
||||
COOKIE_NONCE_LEN = XCHACHA20POLY1305_NONCE_SIZE,
|
||||
COOKIE_LEN = 16
|
||||
};
|
||||
|
||||
enum limits {
|
||||
REKEY_TIMEOUT = 5,
|
||||
INITIATIONS_PER_SECOND = 50,
|
||||
MAX_PEERS_PER_DEVICE = 1U << 20,
|
||||
KEEPALIVE_TIMEOUT = 10,
|
||||
MAX_TIMER_HANDSHAKES = 90 / REKEY_TIMEOUT,
|
||||
MAX_QUEUED_INCOMING_HANDSHAKES = 4096, /* TODO: replace this with DQL */
|
||||
MAX_STAGED_PACKETS = 128,
|
||||
MAX_QUEUED_PACKETS = 1024 /* TODO: replace this with DQL */
|
||||
};
|
||||
|
||||
#define zfree(addr, type) \
|
||||
do { \
|
||||
explicit_bzero(addr, sizeof(*addr)); \
|
||||
free(addr, type); \
|
||||
} while (0)
|
||||
|
||||
struct crypt_queue {
|
||||
union {
|
||||
struct {
|
||||
int last_cpu;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
#define __ATOMIC_LOAD_SIZE \
|
||||
({ \
|
||||
switch (size) { \
|
||||
case 1: *(uint8_t *)res = *(volatile uint8_t *)p; break; \
|
||||
case 2: *(uint16_t *)res = *(volatile uint16_t *)p; break; \
|
||||
case 4: *(uint32_t *)res = *(volatile uint32_t *)p; break; \
|
||||
case 8: *(uint64_t *)res = *(volatile uint64_t *)p; break; \
|
||||
} \
|
||||
})
|
||||
|
||||
static inline void
|
||||
__atomic_load_acq_size(volatile void *p, void *res, int size)
|
||||
{
|
||||
__ATOMIC_LOAD_SIZE;
|
||||
}
|
||||
|
||||
#define atomic_load_acq(x) \
|
||||
({ \
|
||||
union { __typeof(x) __val; char __c[1]; } __u; \
|
||||
__atomic_load_acq_size(&(x), __u.__c, sizeof(x)); \
|
||||
__u.__val; \
|
||||
})
|
||||
|
||||
|
||||
int wg_ctx_init(void);
|
||||
void wg_ctx_uninit(void);
|
||||
|
||||
|
||||
#endif
|
286
sys/dev/if_wg/include/sys/wg_noise.h
Normal file
286
sys/dev/if_wg/include/sys/wg_noise.h
Normal file
@ -0,0 +1,286 @@
|
||||
/*
|
||||
* Copyright (C) 2015-2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
* Copyright (C) 2019-2020 Matt Dunwoodie <ncon@noconroy.net>
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
* ======== wg_noise.h ========
|
||||
*
|
||||
* This file provides a thread safe interface to the Noise protocol as used in
|
||||
* WireGuard. The three user facing components are:
|
||||
*
|
||||
* * noise_local
|
||||
* Stores the local state for a noise peer.
|
||||
* * noise_remote
|
||||
* Stores the remote state for a noise peer.
|
||||
* * noise_upcall
|
||||
* Stores callback routines for index and peers
|
||||
*
|
||||
* Additionally a noise_counter, which is invsible to the user is used to track
|
||||
* message nonces, to prevent message replay.
|
||||
*
|
||||
* This module uses Curve25519 for asymmetric crypto, and ChaCha20Poly1305 for
|
||||
* symmetric crypto. The handshake uses ephemeral keys, which provide perfect
|
||||
* forward secrecy. Keys are NOISE_KEY_SIZE (32) bytes long and can be
|
||||
* generated with a CSRNG. While this module will clamp the key to form a valid
|
||||
* Curve25519 key, it is recommended that keys are stored in Curve25519 form to
|
||||
* preserve interoperability with other systems. Additionally, there is an
|
||||
* optional PresharedKey of length NOISE_PSK_SIZE (also 32 bytes), which when
|
||||
* used, will provide protection against known quantum attacks. Without it,
|
||||
* Curve25519 is broken by Shor's algorithm.
|
||||
*
|
||||
* -------- noise_local --------
|
||||
*
|
||||
* void noise_local_init(noise_local *, noise_upcall *)
|
||||
* - Initialise noise_local, should only be called once and before use.
|
||||
*
|
||||
* int noise_local_set_private(noise_local *, uint8_t *private)
|
||||
* - Set the local private key. This will also calculate the corresponding
|
||||
* public key.
|
||||
*
|
||||
* int noise_local_keys(noise_local *, uint8_t *public, uint8_t *private)
|
||||
* - Get the local keys. It will ensure that a key has been set and if
|
||||
* not, will return ENXIO.
|
||||
*
|
||||
* -------- noise_remote --------
|
||||
*
|
||||
* void noise_remote_init(noise_remote *, uint8_t *public)
|
||||
* - Initialise noise_local, should only be called once and before use. Key
|
||||
* must be provided and it cannot be changed once set.
|
||||
*
|
||||
* void noise_remote_set_psk(noise_remote *, uint8_t *psk)
|
||||
* - Set the shared key. To remove the shared key, set a key of all 0x00.
|
||||
*
|
||||
* void noise_remote_keys(noise_remote *, uint8_t *public, uint8_t *psk)
|
||||
* - Get the remote keys.
|
||||
*
|
||||
* -------- noise_upcall --------
|
||||
*
|
||||
* The noise_upcall struct is used to lookup incoming public keys, as well as
|
||||
* allocate and deallocate index for a remote. The allocation and deallocation
|
||||
* are serialised per noise_remote and guaranteed to only have 3 allocated
|
||||
* indexes at once.
|
||||
*
|
||||
* u_arg - passed to callback functions as void *
|
||||
* u_get_remote - lookup noise_remote based on public key.
|
||||
* u_set_index - allocate index for noise_remote. any further packets that
|
||||
* arrive with this index should be passed to noise_* functions
|
||||
* with the corresponding noise_remote.
|
||||
* u_drop_index - dealloate index passed to callback.
|
||||
*
|
||||
* -------- crypto --------
|
||||
*
|
||||
* The following functions are used for the crypto side of things:
|
||||
*
|
||||
* int noise_create_initiation(noise_remote *, noise_initiation *)
|
||||
* int noise_consume_initiation(noise_local *, noise_remote **, noise_initiation *)
|
||||
* int noise_create_response(noise_remote *, noise_response *)
|
||||
* int noise_consume_response(noise_remote *, noise_response *)
|
||||
*
|
||||
* int noise_remote_promote(noise_remote *)
|
||||
* void noise_remote_clear(noise_remote *)
|
||||
* void noise_remote_expire_current(noise_remote *)
|
||||
* int noise_remote_encrypt(noise_remote *, noise_data *, size_t)
|
||||
* int noise_remote_decrypt(noise_remote *, noise_data *, size_t)
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef __NOISE_H__
|
||||
#define __NOISE_H__
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/rwlock.h>
|
||||
#include <sys/support.h>
|
||||
|
||||
#include <crypto/blake2s.h>
|
||||
#include <zinc/chacha20poly1305.h>
|
||||
#include <crypto/curve25519.h>
|
||||
|
||||
#define NOISE_KEY_SIZE CURVE25519_KEY_SIZE
|
||||
#define NOISE_PSK_SIZE 32
|
||||
#define NOISE_MAC_SIZE CHACHA20POLY1305_AUTHTAG_SIZE
|
||||
#define NOISE_HASH_SIZE BLAKE2S_HASH_SIZE
|
||||
#define NOISE_SYMMETRIC_SIZE CHACHA20POLY1305_KEY_SIZE
|
||||
#define NOISE_TIMESTAMP_SIZE 12
|
||||
|
||||
/* Protocol string constants */
|
||||
#define NOISE_HANDSHAKE_NAME "Noise_IKpsk2_25519_ChaChaPoly_BLAKE2s"
|
||||
#define NOISE_IDENTIFIER_NAME "WireGuard v1 zx2c4 Jason@zx2c4.com"
|
||||
|
||||
/* Constants for the counter */
|
||||
#define COUNTER_TYPE size_t
|
||||
#define COUNTER_BITS_TOTAL 512
|
||||
#define COUNTER_TYPE_BITS (sizeof(COUNTER_TYPE) * 8)
|
||||
#define COUNTER_TYPE_NUM (COUNTER_BITS_TOTAL / COUNTER_TYPE_BITS)
|
||||
#define COUNTER_WINDOW_SIZE (COUNTER_BITS_TOTAL - COUNTER_TYPE_BITS)
|
||||
|
||||
/* Constants for the keypair */
|
||||
#define REKEY_AFTER_MESSAGES (1ull << 60)
|
||||
#define REJECT_AFTER_MESSAGES (UINT64_MAX - COUNTER_WINDOW_SIZE - 1)
|
||||
#define REKEY_AFTER_TIME 120
|
||||
#define REKEY_AFTER_TIME_RECV 165
|
||||
#define REJECT_AFTER_TIME 180
|
||||
#define REJECT_INTERVAL (1000000000 / 50) /* fifty times per sec */
|
||||
/* 24 = floor(log2(REJECT_INTERVAL)) */
|
||||
#define REJECT_INTERVAL_MASK (~((1ull<<24)-1))
|
||||
|
||||
enum noise_state_hs {
|
||||
HS_ZEROED = 0,
|
||||
CREATED_INITIATION,
|
||||
CONSUMED_INITIATION,
|
||||
CREATED_RESPONSE,
|
||||
CONSUMED_RESPONSE,
|
||||
};
|
||||
|
||||
struct noise_handshake {
|
||||
enum noise_state_hs hs_state;
|
||||
uint32_t hs_local_index;
|
||||
uint32_t hs_remote_index;
|
||||
uint8_t hs_e[NOISE_KEY_SIZE];
|
||||
uint8_t hs_hash[NOISE_HASH_SIZE];
|
||||
uint8_t hs_ck[NOISE_HASH_SIZE];
|
||||
};
|
||||
|
||||
struct noise_counter {
|
||||
struct rwlock c_lock;
|
||||
uint64_t c_send;
|
||||
uint64_t c_recv;
|
||||
COUNTER_TYPE c_backtrack[COUNTER_TYPE_NUM];
|
||||
};
|
||||
|
||||
enum noise_state_kp {
|
||||
KP_ZEROED = 0,
|
||||
INITIATOR,
|
||||
RESPONDER,
|
||||
};
|
||||
|
||||
struct noise_keypair {
|
||||
SLIST_ENTRY(noise_keypair) kp_entry;
|
||||
int kp_valid;
|
||||
int kp_is_initiator;
|
||||
uint32_t kp_local_index;
|
||||
uint32_t kp_remote_index;
|
||||
uint8_t kp_send[NOISE_SYMMETRIC_SIZE];
|
||||
uint8_t kp_recv[NOISE_SYMMETRIC_SIZE];
|
||||
struct timespec kp_birthdate; /* nanouptime */
|
||||
struct noise_counter kp_ctr;
|
||||
};
|
||||
|
||||
struct noise_remote {
|
||||
uint8_t r_public[NOISE_KEY_SIZE];
|
||||
struct noise_local *r_local;
|
||||
uint8_t r_ss[NOISE_KEY_SIZE];
|
||||
|
||||
struct rwlock r_handshake_lock;
|
||||
struct noise_handshake r_handshake;
|
||||
uint8_t r_psk[NOISE_PSK_SIZE];
|
||||
uint8_t r_timestamp[NOISE_TIMESTAMP_SIZE];
|
||||
struct timespec r_last_init; /* nanouptime */
|
||||
|
||||
struct rwlock r_keypair_lock;
|
||||
SLIST_HEAD(,noise_keypair) r_unused_keypairs;
|
||||
struct noise_keypair *r_next, *r_current, *r_previous;
|
||||
struct noise_keypair r_keypair[3]; /* 3: next, current, previous. */
|
||||
|
||||
};
|
||||
|
||||
struct noise_local {
|
||||
struct rwlock l_identity_lock;
|
||||
int l_has_identity;
|
||||
uint8_t l_public[NOISE_KEY_SIZE];
|
||||
uint8_t l_private[NOISE_KEY_SIZE];
|
||||
|
||||
struct noise_upcall {
|
||||
void *u_arg;
|
||||
struct noise_remote *
|
||||
(*u_remote_get)(void *, uint8_t[NOISE_KEY_SIZE]);
|
||||
uint32_t
|
||||
(*u_index_set)(void *, struct noise_remote *);
|
||||
void (*u_index_drop)(void *, uint32_t);
|
||||
} l_upcall;
|
||||
};
|
||||
|
||||
struct noise_initiation {
|
||||
uint32_t s_idx;
|
||||
uint8_t ue[NOISE_KEY_SIZE];
|
||||
uint8_t es[NOISE_KEY_SIZE + NOISE_MAC_SIZE];
|
||||
uint8_t ets[NOISE_TIMESTAMP_SIZE + NOISE_MAC_SIZE];
|
||||
} __packed;
|
||||
|
||||
struct noise_response {
|
||||
uint32_t s_idx;
|
||||
uint32_t r_idx;
|
||||
uint8_t ue[NOISE_KEY_SIZE];
|
||||
uint8_t en[0 + NOISE_MAC_SIZE];
|
||||
} __packed;
|
||||
|
||||
struct noise_data {
|
||||
uint32_t r_idx;
|
||||
uint64_t nonce;
|
||||
uint8_t buf[];
|
||||
} __packed;
|
||||
|
||||
|
||||
/* Set/Get noise parameters */
|
||||
void noise_local_init(struct noise_local *, struct noise_upcall *);
|
||||
void noise_local_lock_identity(struct noise_local *);
|
||||
void noise_local_unlock_identity(struct noise_local *);
|
||||
int noise_local_set_private(struct noise_local *, uint8_t[NOISE_KEY_SIZE]);
|
||||
int noise_local_keys(struct noise_local *, uint8_t[NOISE_KEY_SIZE],
|
||||
uint8_t[NOISE_KEY_SIZE]);
|
||||
|
||||
void noise_remote_init(struct noise_remote *, const uint8_t[NOISE_KEY_SIZE],
|
||||
struct noise_local *);
|
||||
int noise_remote_set_psk(struct noise_remote *, const uint8_t[NOISE_PSK_SIZE]);
|
||||
int noise_remote_keys(struct noise_remote *, uint8_t[NOISE_KEY_SIZE],
|
||||
uint8_t[NOISE_PSK_SIZE]);
|
||||
|
||||
/* Should be called anytime noise_local_set_private is called */
|
||||
void noise_remote_precompute(struct noise_remote *);
|
||||
|
||||
/* Cryptographic functions */
|
||||
int noise_create_initiation(
|
||||
struct noise_remote *,
|
||||
struct noise_initiation *);
|
||||
|
||||
int noise_consume_initiation(
|
||||
struct noise_local *,
|
||||
struct noise_remote **,
|
||||
struct noise_initiation *);
|
||||
|
||||
int noise_create_response(
|
||||
struct noise_remote *,
|
||||
struct noise_response *);
|
||||
|
||||
int noise_consume_response(
|
||||
struct noise_remote *,
|
||||
struct noise_response *);
|
||||
|
||||
int noise_remote_begin_session(struct noise_remote *);
|
||||
void noise_remote_clear(struct noise_remote *);
|
||||
void noise_remote_expire_current(struct noise_remote *);
|
||||
|
||||
int noise_remote_ready(struct noise_remote *);
|
||||
|
||||
int noise_remote_encrypt(
|
||||
struct noise_remote *,
|
||||
struct noise_data *,
|
||||
size_t);
|
||||
int noise_remote_decrypt(
|
||||
struct noise_remote *,
|
||||
struct noise_data *,
|
||||
size_t);
|
||||
|
||||
#endif /* __NOISE_H__ */
|
50
sys/dev/if_wg/include/zinc/blake2s.h
Normal file
50
sys/dev/if_wg/include/zinc/blake2s.h
Normal file
@ -0,0 +1,50 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ZINC_BLAKE2S_H
|
||||
#define _ZINC_BLAKE2S_H
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
enum blake2s_lengths {
|
||||
BLAKE2S_BLOCK_SIZE = 64,
|
||||
BLAKE2S_HASH_SIZE = 32,
|
||||
BLAKE2S_KEY_SIZE = 32
|
||||
};
|
||||
|
||||
struct blake2s_state {
|
||||
uint32_t h[8];
|
||||
uint32_t t[2];
|
||||
uint32_t f[2];
|
||||
uint8_t buf[BLAKE2S_BLOCK_SIZE];
|
||||
unsigned int buflen;
|
||||
unsigned int outlen;
|
||||
};
|
||||
|
||||
void blake2s_init(struct blake2s_state *state, const size_t outlen);
|
||||
void blake2s_init_key(struct blake2s_state *state, const size_t outlen,
|
||||
const void *key, const size_t keylen);
|
||||
void blake2s_update(struct blake2s_state *state, const uint8_t *in, size_t inlen);
|
||||
//void blake2s_final(struct blake2s_state *state, uint8_t *out);
|
||||
|
||||
static inline void blake2s(uint8_t *out, const uint8_t *in, const uint8_t *key,
|
||||
const size_t outlen, const size_t inlen,
|
||||
const size_t keylen)
|
||||
{
|
||||
struct blake2s_state state;
|
||||
|
||||
if (keylen)
|
||||
blake2s_init_key(&state, outlen, key, keylen);
|
||||
else
|
||||
blake2s_init(&state, outlen);
|
||||
|
||||
blake2s_update(&state, in, inlen);
|
||||
blake2s_final(&state, out);
|
||||
}
|
||||
|
||||
void blake2s_hmac(uint8_t *out, const uint8_t *in, const uint8_t *key, const size_t outlen,
|
||||
const size_t inlen, const size_t keylen);
|
||||
|
||||
#endif /* _ZINC_BLAKE2S_H */
|
68
sys/dev/if_wg/include/zinc/chacha20.h
Normal file
68
sys/dev/if_wg/include/zinc/chacha20.h
Normal file
@ -0,0 +1,68 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ZINC_CHACHA20_H
|
||||
#define _ZINC_CHACHA20_H
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/support.h>
|
||||
|
||||
enum chacha20_lengths {
|
||||
CHACHA20_NONCE_SIZE = 16,
|
||||
CHACHA20_KEY_SIZE = 32,
|
||||
CHACHA20_KEY_WORDS = CHACHA20_KEY_SIZE / sizeof(u32),
|
||||
CHACHA20_BLOCK_SIZE = 64,
|
||||
CHACHA20_BLOCK_WORDS = CHACHA20_BLOCK_SIZE / sizeof(u32),
|
||||
HCHACHA20_NONCE_SIZE = CHACHA20_NONCE_SIZE,
|
||||
HCHACHA20_KEY_SIZE = CHACHA20_KEY_SIZE
|
||||
};
|
||||
|
||||
enum chacha20_constants { /* expand 32-byte k */
|
||||
CHACHA20_CONSTANT_EXPA = 0x61707865U,
|
||||
CHACHA20_CONSTANT_ND_3 = 0x3320646eU,
|
||||
CHACHA20_CONSTANT_2_BY = 0x79622d32U,
|
||||
CHACHA20_CONSTANT_TE_K = 0x6b206574U
|
||||
};
|
||||
|
||||
struct chacha20_ctx {
|
||||
union {
|
||||
u32 state[16];
|
||||
struct {
|
||||
u32 constant[4];
|
||||
u32 key[8];
|
||||
u32 counter[4];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
static inline void chacha20_init(struct chacha20_ctx *ctx,
|
||||
const u8 key[CHACHA20_KEY_SIZE],
|
||||
const u64 nonce)
|
||||
{
|
||||
ctx->constant[0] = CHACHA20_CONSTANT_EXPA;
|
||||
ctx->constant[1] = CHACHA20_CONSTANT_ND_3;
|
||||
ctx->constant[2] = CHACHA20_CONSTANT_2_BY;
|
||||
ctx->constant[3] = CHACHA20_CONSTANT_TE_K;
|
||||
ctx->key[0] = get_unaligned_le32(key + 0);
|
||||
ctx->key[1] = get_unaligned_le32(key + 4);
|
||||
ctx->key[2] = get_unaligned_le32(key + 8);
|
||||
ctx->key[3] = get_unaligned_le32(key + 12);
|
||||
ctx->key[4] = get_unaligned_le32(key + 16);
|
||||
ctx->key[5] = get_unaligned_le32(key + 20);
|
||||
ctx->key[6] = get_unaligned_le32(key + 24);
|
||||
ctx->key[7] = get_unaligned_le32(key + 28);
|
||||
ctx->counter[0] = 0;
|
||||
ctx->counter[1] = 0;
|
||||
ctx->counter[2] = nonce & U32_MAX;
|
||||
ctx->counter[3] = nonce >> 32;
|
||||
}
|
||||
void chacha20(struct chacha20_ctx *ctx, u8 *dst, const u8 *src, u32 len,
|
||||
simd_context_t *simd_context);
|
||||
|
||||
void hchacha20(u32 derived_key[CHACHA20_KEY_WORDS],
|
||||
const u8 nonce[HCHACHA20_NONCE_SIZE],
|
||||
const u8 key[HCHACHA20_KEY_SIZE], simd_context_t *simd_context);
|
||||
|
||||
#endif /* _ZINC_CHACHA20_H */
|
48
sys/dev/if_wg/include/zinc/chacha20poly1305.h
Normal file
48
sys/dev/if_wg/include/zinc/chacha20poly1305.h
Normal file
@ -0,0 +1,48 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ZINC_CHACHA20POLY1305_H
|
||||
#define _ZINC_CHACHA20POLY1305_H
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
struct scatterlist;
|
||||
|
||||
enum chacha20poly1305_lengths {
|
||||
XCHACHA20POLY1305_NONCE_SIZE = 24,
|
||||
CHACHA20POLY1305_KEY_SIZE = 32,
|
||||
CHACHA20POLY1305_AUTHTAG_SIZE = 16
|
||||
};
|
||||
|
||||
void chacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src, const size_t src_len,
|
||||
const uint8_t *ad, const size_t ad_len,
|
||||
const uint64_t nonce,
|
||||
const uint8_t key[CHACHA20POLY1305_KEY_SIZE]);
|
||||
|
||||
bool chacha20poly1305_encrypt_sg_inplace(
|
||||
struct scatterlist *src, const size_t src_len, const uint8_t *ad,
|
||||
const size_t ad_len, const uint64_t nonce,
|
||||
const uint8_t key[CHACHA20POLY1305_KEY_SIZE], simd_context_t *simd_context);
|
||||
|
||||
bool chacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src, const size_t src_len,
|
||||
const uint8_t *ad, const size_t ad_len, const uint64_t nonce,
|
||||
const uint8_t key[CHACHA20POLY1305_KEY_SIZE]);
|
||||
|
||||
bool chacha20poly1305_decrypt_sg_inplace(
|
||||
struct scatterlist *src, size_t src_len, const uint8_t *ad,
|
||||
const size_t ad_len, const uint64_t nonce,
|
||||
const uint8_t key[CHACHA20POLY1305_KEY_SIZE], simd_context_t *simd_context);
|
||||
|
||||
void xchacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src, const size_t src_len,
|
||||
const uint8_t *ad, const size_t ad_len,
|
||||
const uint8_t nonce[XCHACHA20POLY1305_NONCE_SIZE],
|
||||
const uint8_t key[CHACHA20POLY1305_KEY_SIZE]);
|
||||
|
||||
bool xchacha20poly1305_decrypt(
|
||||
uint8_t *dst, const uint8_t *src, const size_t src_len, const uint8_t *ad,
|
||||
const size_t ad_len, const uint8_t nonce[XCHACHA20POLY1305_NONCE_SIZE],
|
||||
const uint8_t key[CHACHA20POLY1305_KEY_SIZE]);
|
||||
|
||||
#endif /* _ZINC_CHACHA20POLY1305_H */
|
28
sys/dev/if_wg/include/zinc/curve25519.h
Normal file
28
sys/dev/if_wg/include/zinc/curve25519.h
Normal file
@ -0,0 +1,28 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ZINC_CURVE25519_H
|
||||
#define _ZINC_CURVE25519_H
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
enum curve25519_lengths {
|
||||
CURVE25519_KEY_SIZE = 32
|
||||
};
|
||||
|
||||
bool curve25519(uint8_t mypublic[CURVE25519_KEY_SIZE],
|
||||
const uint8_t secret[CURVE25519_KEY_SIZE],
|
||||
const uint8_t basepoint[CURVE25519_KEY_SIZE]);
|
||||
void curve25519_generate_secret(uint8_t secret[CURVE25519_KEY_SIZE]);
|
||||
bool curve25519_generate_public(
|
||||
uint8_t pub[CURVE25519_KEY_SIZE], const uint8_t secret[CURVE25519_KEY_SIZE]);
|
||||
|
||||
static inline void curve25519_clamp_secret(uint8_t secret[CURVE25519_KEY_SIZE])
|
||||
{
|
||||
secret[0] &= 248;
|
||||
secret[31] = (secret[31] & 127) | 64;
|
||||
}
|
||||
|
||||
#endif /* _ZINC_CURVE25519_H */
|
29
sys/dev/if_wg/include/zinc/poly1305.h
Normal file
29
sys/dev/if_wg/include/zinc/poly1305.h
Normal file
@ -0,0 +1,29 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ZINC_POLY1305_H
|
||||
#define _ZINC_POLY1305_H
|
||||
|
||||
|
||||
enum poly1305_lengths {
|
||||
POLY1305_BLOCK_SIZE = 16,
|
||||
POLY1305_KEY_SIZE = 32,
|
||||
POLY1305_MAC_SIZE = 16
|
||||
};
|
||||
|
||||
struct poly1305_ctx {
|
||||
u8 opaque[24 * sizeof(u64)];
|
||||
u32 nonce[4];
|
||||
u8 data[POLY1305_BLOCK_SIZE];
|
||||
size_t num;
|
||||
} __aligned(8);
|
||||
|
||||
void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE]);
|
||||
void poly1305_update(struct poly1305_ctx *ctx, const u8 *input, size_t len,
|
||||
simd_context_t *simd_context);
|
||||
void poly1305_final(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE],
|
||||
simd_context_t *simd_context);
|
||||
|
||||
#endif /* _ZINC_POLY1305_H */
|
256
sys/dev/if_wg/module/blake2s.c
Normal file
256
sys/dev/if_wg/module/blake2s.c
Normal file
@ -0,0 +1,256 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2012 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*
|
||||
* This is an implementation of the BLAKE2s hash and PRF functions.
|
||||
*
|
||||
* Information: https://blake2.net/
|
||||
*
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/endian.h>
|
||||
|
||||
#include <crypto/blake2s.h>
|
||||
|
||||
static inline uint32_t
|
||||
ror32(uint32_t word, unsigned int shift)
|
||||
{
|
||||
return (word >> shift) | (word << (32 - shift));
|
||||
}
|
||||
|
||||
typedef union {
|
||||
struct {
|
||||
uint8_t digest_length;
|
||||
uint8_t key_length;
|
||||
uint8_t fanout;
|
||||
uint8_t depth;
|
||||
uint32_t leaf_length;
|
||||
uint32_t node_offset;
|
||||
uint16_t xof_length;
|
||||
uint8_t node_depth;
|
||||
uint8_t inner_length;
|
||||
uint8_t salt[8];
|
||||
uint8_t personal[8];
|
||||
};
|
||||
uint32_t words[8];
|
||||
} __packed blake2s_param;
|
||||
|
||||
static const uint32_t blake2s_iv[8] = {
|
||||
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
|
||||
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
|
||||
};
|
||||
|
||||
static const uint8_t blake2s_sigma[10][16] = {
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
|
||||
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
|
||||
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
|
||||
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
|
||||
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
|
||||
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
|
||||
};
|
||||
|
||||
static inline void blake2s_set_lastblock(struct blake2s_state *state)
|
||||
{
|
||||
if (state->last_node)
|
||||
state->f[1] = -1;
|
||||
state->f[0] = -1;
|
||||
}
|
||||
|
||||
static inline void blake2s_increment_counter(struct blake2s_state *state,
|
||||
const uint32_t inc)
|
||||
{
|
||||
state->t[0] += inc;
|
||||
state->t[1] += (state->t[0] < inc);
|
||||
}
|
||||
|
||||
static inline void blake2s_init_param(struct blake2s_state *state,
|
||||
const blake2s_param *param)
|
||||
{
|
||||
int i;
|
||||
|
||||
memset(state, 0, sizeof(*state));
|
||||
for (i = 0; i < 8; ++i)
|
||||
state->h[i] = blake2s_iv[i] ^ le32toh(param->words[i]);
|
||||
}
|
||||
|
||||
void blake2s_init(struct blake2s_state *state, const size_t outlen)
|
||||
{
|
||||
blake2s_param param __aligned(__alignof__(uint32_t)) = {
|
||||
.digest_length = outlen,
|
||||
.fanout = 1,
|
||||
.depth = 1
|
||||
};
|
||||
|
||||
/*WARN_ON(IS_ENABLED(DEBUG) && (!outlen || outlen > BLAKE2S_HASH_SIZE));*/
|
||||
blake2s_init_param(state, ¶m);
|
||||
}
|
||||
|
||||
void blake2s_init_key(struct blake2s_state *state, const size_t outlen,
|
||||
const void *key, const size_t keylen)
|
||||
{
|
||||
blake2s_param param = { .digest_length = outlen,
|
||||
.key_length = keylen,
|
||||
.fanout = 1,
|
||||
.depth = 1 };
|
||||
uint8_t block[BLAKE2S_BLOCK_SIZE] = { 0 };
|
||||
|
||||
/*WARN_ON(IS_ENABLED(DEBUG) && (!outlen || outlen > BLAKE2S_HASH_SIZE ||
|
||||
!key || !keylen || keylen > BLAKE2S_KEY_SIZE));*/
|
||||
blake2s_init_param(state, ¶m);
|
||||
memcpy(block, key, keylen);
|
||||
blake2s_update(state, block, BLAKE2S_BLOCK_SIZE);
|
||||
explicit_bzero(block, BLAKE2S_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
static inline void blake2s_compress(struct blake2s_state *state,
|
||||
const uint8_t *block, size_t nblocks,
|
||||
const uint32_t inc)
|
||||
{
|
||||
uint32_t m[16];
|
||||
uint32_t v[16];
|
||||
int i;
|
||||
|
||||
/*WARN_ON(IS_ENABLED(DEBUG) &&
|
||||
(nblocks > 1 && inc != BLAKE2S_BLOCK_SIZE));*/
|
||||
|
||||
while (nblocks > 0) {
|
||||
blake2s_increment_counter(state, inc);
|
||||
memcpy(m, block, BLAKE2S_BLOCK_SIZE);
|
||||
for(i = 0; i < (sizeof(m)/sizeof(m[0])); i++)
|
||||
(m[i]) = le32toh((m[i]));
|
||||
memcpy(v, state->h, 32);
|
||||
v[ 8] = blake2s_iv[0];
|
||||
v[ 9] = blake2s_iv[1];
|
||||
v[10] = blake2s_iv[2];
|
||||
v[11] = blake2s_iv[3];
|
||||
v[12] = blake2s_iv[4] ^ state->t[0];
|
||||
v[13] = blake2s_iv[5] ^ state->t[1];
|
||||
v[14] = blake2s_iv[6] ^ state->f[0];
|
||||
v[15] = blake2s_iv[7] ^ state->f[1];
|
||||
|
||||
#define G(r, i, a, b, c, d) do { \
|
||||
a += b + m[blake2s_sigma[r][2 * i + 0]]; \
|
||||
d = ror32(d ^ a, 16); \
|
||||
c += d; \
|
||||
b = ror32(b ^ c, 12); \
|
||||
a += b + m[blake2s_sigma[r][2 * i + 1]]; \
|
||||
d = ror32(d ^ a, 8); \
|
||||
c += d; \
|
||||
b = ror32(b ^ c, 7); \
|
||||
} while (0)
|
||||
|
||||
#define ROUND(r) do { \
|
||||
G(r, 0, v[0], v[ 4], v[ 8], v[12]); \
|
||||
G(r, 1, v[1], v[ 5], v[ 9], v[13]); \
|
||||
G(r, 2, v[2], v[ 6], v[10], v[14]); \
|
||||
G(r, 3, v[3], v[ 7], v[11], v[15]); \
|
||||
G(r, 4, v[0], v[ 5], v[10], v[15]); \
|
||||
G(r, 5, v[1], v[ 6], v[11], v[12]); \
|
||||
G(r, 6, v[2], v[ 7], v[ 8], v[13]); \
|
||||
G(r, 7, v[3], v[ 4], v[ 9], v[14]); \
|
||||
} while (0)
|
||||
ROUND(0);
|
||||
ROUND(1);
|
||||
ROUND(2);
|
||||
ROUND(3);
|
||||
ROUND(4);
|
||||
ROUND(5);
|
||||
ROUND(6);
|
||||
ROUND(7);
|
||||
ROUND(8);
|
||||
ROUND(9);
|
||||
|
||||
#undef G
|
||||
#undef ROUND
|
||||
|
||||
for (i = 0; i < 8; ++i)
|
||||
state->h[i] ^= v[i] ^ v[i + 8];
|
||||
|
||||
block += BLAKE2S_BLOCK_SIZE;
|
||||
--nblocks;
|
||||
}
|
||||
}
|
||||
|
||||
void blake2s_update(struct blake2s_state *state, const uint8_t *in, size_t inlen)
|
||||
{
|
||||
const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
|
||||
|
||||
if (!inlen)
|
||||
return;
|
||||
if (inlen > fill) {
|
||||
memcpy(state->buf + state->buflen, in, fill);
|
||||
blake2s_compress(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
|
||||
state->buflen = 0;
|
||||
in += fill;
|
||||
inlen -= fill;
|
||||
}
|
||||
if (inlen > BLAKE2S_BLOCK_SIZE) {
|
||||
const size_t nblocks =
|
||||
(inlen + BLAKE2S_BLOCK_SIZE - 1) / BLAKE2S_BLOCK_SIZE;
|
||||
/* Hash one less (full) block than strictly possible */
|
||||
blake2s_compress(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
|
||||
in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
|
||||
inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
|
||||
}
|
||||
memcpy(state->buf + state->buflen, in, inlen);
|
||||
state->buflen += inlen;
|
||||
}
|
||||
|
||||
void blake2s_final(struct blake2s_state *state, uint8_t *out, const size_t outlen)
|
||||
{
|
||||
int i;
|
||||
/*WARN_ON(IS_ENABLED(DEBUG) &&
|
||||
(!out || !outlen || outlen > BLAKE2S_HASH_SIZE));*/
|
||||
blake2s_set_lastblock(state);
|
||||
memset(state->buf + state->buflen, 0,
|
||||
BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
|
||||
blake2s_compress(state, state->buf, 1, state->buflen);
|
||||
for(i = 0; i < (sizeof(state->h)/sizeof(state->h[0])); i++)
|
||||
(state->h[i]) = htole32((state->h[i]));
|
||||
|
||||
memcpy(out, state->h, outlen);
|
||||
explicit_bzero(state, sizeof(*state));
|
||||
}
|
||||
|
||||
void blake2s_hmac(uint8_t *out, const uint8_t *in, const uint8_t *key, const size_t outlen,
|
||||
const size_t inlen, const size_t keylen)
|
||||
{
|
||||
struct blake2s_state state;
|
||||
uint8_t x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(uint32_t)) = { 0 };
|
||||
uint8_t i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(uint32_t));
|
||||
int i;
|
||||
|
||||
if (keylen > BLAKE2S_BLOCK_SIZE) {
|
||||
blake2s_init(&state, BLAKE2S_HASH_SIZE);
|
||||
blake2s_update(&state, key, keylen);
|
||||
blake2s_final(&state, x_key, BLAKE2S_HASH_SIZE);
|
||||
} else
|
||||
memcpy(x_key, key, keylen);
|
||||
|
||||
for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
|
||||
x_key[i] ^= 0x36;
|
||||
|
||||
blake2s_init(&state, BLAKE2S_HASH_SIZE);
|
||||
blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
|
||||
blake2s_update(&state, in, inlen);
|
||||
blake2s_final(&state, i_hash, BLAKE2S_HASH_SIZE);
|
||||
|
||||
for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
|
||||
x_key[i] ^= 0x5c ^ 0x36;
|
||||
|
||||
blake2s_init(&state, BLAKE2S_HASH_SIZE);
|
||||
blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
|
||||
blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE);
|
||||
blake2s_final(&state, i_hash, BLAKE2S_HASH_SIZE);
|
||||
|
||||
memcpy(out, i_hash, outlen);
|
||||
explicit_bzero(x_key, BLAKE2S_BLOCK_SIZE);
|
||||
explicit_bzero(i_hash, BLAKE2S_HASH_SIZE);
|
||||
}
|
58
sys/dev/if_wg/module/blake2s.h
Normal file
58
sys/dev/if_wg/module/blake2s.h
Normal file
@ -0,0 +1,58 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#ifndef _BLAKE2S_H_
|
||||
#define _BLAKE2S_H_
|
||||
|
||||
/*#define WARN_ON(a) if(a) printf("%s failed at %s:%d\n", #a, __FILE__, __LINE__)
|
||||
#define IS_ENABLED(...) true*/
|
||||
|
||||
|
||||
enum blake2s_lengths {
|
||||
BLAKE2S_BLOCK_SIZE = 64,
|
||||
BLAKE2S_HASH_SIZE = 32,
|
||||
BLAKE2S_KEY_SIZE = 32
|
||||
};
|
||||
|
||||
struct blake2s_state {
|
||||
uint32_t h[8];
|
||||
uint32_t t[2];
|
||||
uint32_t f[2];
|
||||
uint8_t buf[BLAKE2S_BLOCK_SIZE];
|
||||
size_t buflen;
|
||||
uint8_t last_node;
|
||||
};
|
||||
|
||||
void blake2s_init(struct blake2s_state *state, const size_t outlen);
|
||||
void blake2s_init_key(struct blake2s_state *state, const size_t outlen,
|
||||
const void *key, const size_t keylen);
|
||||
void blake2s_update(struct blake2s_state *state, const uint8_t *in, size_t inlen);
|
||||
void blake2s_final(struct blake2s_state *state, uint8_t *out, const size_t outlen);
|
||||
|
||||
static inline void blake2s(uint8_t *out, const uint8_t *in, const uint8_t *key,
|
||||
const size_t outlen, const size_t inlen,
|
||||
const size_t keylen)
|
||||
{
|
||||
struct blake2s_state state;
|
||||
|
||||
/*WARN_ON(IS_ENABLED(DEBUG) && ((!in && inlen > 0) || !out || !outlen ||
|
||||
outlen > BLAKE2S_HASH_SIZE || keylen > BLAKE2S_KEY_SIZE ||
|
||||
(!key && keylen)));*/
|
||||
|
||||
if (keylen)
|
||||
blake2s_init_key(&state, outlen, key, keylen);
|
||||
else
|
||||
blake2s_init(&state, outlen);
|
||||
|
||||
blake2s_update(&state, in, inlen);
|
||||
blake2s_final(&state, out, outlen);
|
||||
}
|
||||
|
||||
void blake2s_hmac(uint8_t *out, const uint8_t *in, const uint8_t *key,
|
||||
const size_t outlen, const size_t inlen, const size_t keylen);
|
||||
|
||||
#endif /* _BLAKE2S_H_ */
|
2834
sys/dev/if_wg/module/chacha20-x86_64.S
Normal file
2834
sys/dev/if_wg/module/chacha20-x86_64.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,98 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
#if defined(CONFIG_ZINC_ARCH_ARM)
|
||||
#include <asm/system_info.h>
|
||||
#include <asm/cputype.h>
|
||||
#endif
|
||||
|
||||
asmlinkage void chacha20_arm(u8 *out, const u8 *in, const size_t len,
|
||||
const u32 key[8], const u32 counter[4]);
|
||||
asmlinkage void hchacha20_arm(const u32 state[16], u32 out[8]);
|
||||
asmlinkage void chacha20_neon(u8 *out, const u8 *in, const size_t len,
|
||||
const u32 key[8], const u32 counter[4]);
|
||||
|
||||
static bool chacha20_use_neon __ro_after_init;
|
||||
static bool *const chacha20_nobs[] __initconst = { &chacha20_use_neon };
|
||||
static void __init chacha20_fpu_init(void)
|
||||
{
|
||||
#if defined(CONFIG_ZINC_ARCH_ARM64)
|
||||
chacha20_use_neon = cpu_have_named_feature(ASIMD);
|
||||
#elif defined(CONFIG_ZINC_ARCH_ARM)
|
||||
switch (read_cpuid_part()) {
|
||||
case ARM_CPU_PART_CORTEX_A7:
|
||||
case ARM_CPU_PART_CORTEX_A5:
|
||||
/* The Cortex-A7 and Cortex-A5 do not perform well with the NEON
|
||||
* implementation but do incredibly with the scalar one and use
|
||||
* less power.
|
||||
*/
|
||||
break;
|
||||
default:
|
||||
chacha20_use_neon = elf_hwcap & HWCAP_NEON;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst,
|
||||
const u8 *src, size_t len,
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
/* SIMD disables preemption, so relax after processing each page. */
|
||||
BUILD_BUG_ON(PAGE_SIZE < CHACHA20_BLOCK_SIZE ||
|
||||
PAGE_SIZE % CHACHA20_BLOCK_SIZE);
|
||||
|
||||
for (;;) {
|
||||
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && chacha20_use_neon &&
|
||||
len >= CHACHA20_BLOCK_SIZE * 3 && simd_use(simd_context)) {
|
||||
const size_t bytes = min_t(size_t, len, PAGE_SIZE);
|
||||
|
||||
chacha20_neon(dst, src, bytes, ctx->key, ctx->counter);
|
||||
ctx->counter[0] += (bytes + 63) / 64;
|
||||
len -= bytes;
|
||||
if (!len)
|
||||
break;
|
||||
dst += bytes;
|
||||
src += bytes;
|
||||
simd_relax(simd_context);
|
||||
} else {
|
||||
chacha20_arm(dst, src, len, ctx->key, ctx->counter);
|
||||
ctx->counter[0] += (len + 63) / 64;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS],
|
||||
const u8 nonce[HCHACHA20_NONCE_SIZE],
|
||||
const u8 key[HCHACHA20_KEY_SIZE],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_ZINC_ARCH_ARM)) {
|
||||
u32 x[] = { CHACHA20_CONSTANT_EXPA,
|
||||
CHACHA20_CONSTANT_ND_3,
|
||||
CHACHA20_CONSTANT_2_BY,
|
||||
CHACHA20_CONSTANT_TE_K,
|
||||
get_unaligned_le32(key + 0),
|
||||
get_unaligned_le32(key + 4),
|
||||
get_unaligned_le32(key + 8),
|
||||
get_unaligned_le32(key + 12),
|
||||
get_unaligned_le32(key + 16),
|
||||
get_unaligned_le32(key + 20),
|
||||
get_unaligned_le32(key + 24),
|
||||
get_unaligned_le32(key + 28),
|
||||
get_unaligned_le32(nonce + 0),
|
||||
get_unaligned_le32(nonce + 4),
|
||||
get_unaligned_le32(nonce + 8),
|
||||
get_unaligned_le32(nonce + 12)
|
||||
};
|
||||
hchacha20_arm(x, derived_key);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
1227
sys/dev/if_wg/module/crypto/zinc/chacha20/chacha20-arm.pl
Executable file
1227
sys/dev/if_wg/module/crypto/zinc/chacha20/chacha20-arm.pl
Executable file
File diff suppressed because it is too large
Load Diff
1163
sys/dev/if_wg/module/crypto/zinc/chacha20/chacha20-arm64.pl
Executable file
1163
sys/dev/if_wg/module/crypto/zinc/chacha20/chacha20-arm64.pl
Executable file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
asmlinkage void chacha20_mips(u32 state[16], u8 *out, const u8 *in,
|
||||
const size_t len);
|
||||
static bool *const chacha20_nobs[] __initconst = { };
|
||||
static void __init chacha20_fpu_init(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst,
|
||||
const u8 *src, size_t len,
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
chacha20_mips(ctx->state, dst, src, len);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS],
|
||||
const u8 nonce[HCHACHA20_NONCE_SIZE],
|
||||
const u8 key[HCHACHA20_KEY_SIZE],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
return false;
|
||||
}
|
424
sys/dev/if_wg/module/crypto/zinc/chacha20/chacha20-mips.S
Normal file
424
sys/dev/if_wg/module/crypto/zinc/chacha20/chacha20-mips.S
Normal file
@ -0,0 +1,424 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com>. All Rights Reserved.
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#define MASK_U32 0x3c
|
||||
#define CHACHA20_BLOCK_SIZE 64
|
||||
#define STACK_SIZE 32
|
||||
|
||||
#define X0 $t0
|
||||
#define X1 $t1
|
||||
#define X2 $t2
|
||||
#define X3 $t3
|
||||
#define X4 $t4
|
||||
#define X5 $t5
|
||||
#define X6 $t6
|
||||
#define X7 $t7
|
||||
#define X8 $t8
|
||||
#define X9 $t9
|
||||
#define X10 $v1
|
||||
#define X11 $s6
|
||||
#define X12 $s5
|
||||
#define X13 $s4
|
||||
#define X14 $s3
|
||||
#define X15 $s2
|
||||
/* Use regs which are overwritten on exit for Tx so we don't leak clear data. */
|
||||
#define T0 $s1
|
||||
#define T1 $s0
|
||||
#define T(n) T ## n
|
||||
#define X(n) X ## n
|
||||
|
||||
/* Input arguments */
|
||||
#define STATE $a0
|
||||
#define OUT $a1
|
||||
#define IN $a2
|
||||
#define BYTES $a3
|
||||
|
||||
/* Output argument */
|
||||
/* NONCE[0] is kept in a register and not in memory.
|
||||
* We don't want to touch original value in memory.
|
||||
* Must be incremented every loop iteration.
|
||||
*/
|
||||
#define NONCE_0 $v0
|
||||
|
||||
/* SAVED_X and SAVED_CA are set in the jump table.
|
||||
* Use regs which are overwritten on exit else we don't leak clear data.
|
||||
* They are used to handling the last bytes which are not multiple of 4.
|
||||
*/
|
||||
#define SAVED_X X15
|
||||
#define SAVED_CA $s7
|
||||
|
||||
#define IS_UNALIGNED $s7
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
#define MSB 0
|
||||
#define LSB 3
|
||||
#define ROTx rotl
|
||||
#define ROTR(n) rotr n, 24
|
||||
#define CPU_TO_LE32(n) \
|
||||
wsbh n; \
|
||||
rotr n, 16;
|
||||
#else
|
||||
#define MSB 3
|
||||
#define LSB 0
|
||||
#define ROTx rotr
|
||||
#define CPU_TO_LE32(n)
|
||||
#define ROTR(n)
|
||||
#endif
|
||||
|
||||
#define FOR_EACH_WORD(x) \
|
||||
x( 0); \
|
||||
x( 1); \
|
||||
x( 2); \
|
||||
x( 3); \
|
||||
x( 4); \
|
||||
x( 5); \
|
||||
x( 6); \
|
||||
x( 7); \
|
||||
x( 8); \
|
||||
x( 9); \
|
||||
x(10); \
|
||||
x(11); \
|
||||
x(12); \
|
||||
x(13); \
|
||||
x(14); \
|
||||
x(15);
|
||||
|
||||
#define FOR_EACH_WORD_REV(x) \
|
||||
x(15); \
|
||||
x(14); \
|
||||
x(13); \
|
||||
x(12); \
|
||||
x(11); \
|
||||
x(10); \
|
||||
x( 9); \
|
||||
x( 8); \
|
||||
x( 7); \
|
||||
x( 6); \
|
||||
x( 5); \
|
||||
x( 4); \
|
||||
x( 3); \
|
||||
x( 2); \
|
||||
x( 1); \
|
||||
x( 0);
|
||||
|
||||
#define PLUS_ONE_0 1
|
||||
#define PLUS_ONE_1 2
|
||||
#define PLUS_ONE_2 3
|
||||
#define PLUS_ONE_3 4
|
||||
#define PLUS_ONE_4 5
|
||||
#define PLUS_ONE_5 6
|
||||
#define PLUS_ONE_6 7
|
||||
#define PLUS_ONE_7 8
|
||||
#define PLUS_ONE_8 9
|
||||
#define PLUS_ONE_9 10
|
||||
#define PLUS_ONE_10 11
|
||||
#define PLUS_ONE_11 12
|
||||
#define PLUS_ONE_12 13
|
||||
#define PLUS_ONE_13 14
|
||||
#define PLUS_ONE_14 15
|
||||
#define PLUS_ONE_15 16
|
||||
#define PLUS_ONE(x) PLUS_ONE_ ## x
|
||||
#define _CONCAT3(a,b,c) a ## b ## c
|
||||
#define CONCAT3(a,b,c) _CONCAT3(a,b,c)
|
||||
|
||||
#define STORE_UNALIGNED(x) \
|
||||
CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
|
||||
.if (x != 12); \
|
||||
lw T0, (x*4)(STATE); \
|
||||
.endif; \
|
||||
lwl T1, (x*4)+MSB ## (IN); \
|
||||
lwr T1, (x*4)+LSB ## (IN); \
|
||||
.if (x == 12); \
|
||||
addu X ## x, NONCE_0; \
|
||||
.else; \
|
||||
addu X ## x, T0; \
|
||||
.endif; \
|
||||
CPU_TO_LE32(X ## x); \
|
||||
xor X ## x, T1; \
|
||||
swl X ## x, (x*4)+MSB ## (OUT); \
|
||||
swr X ## x, (x*4)+LSB ## (OUT);
|
||||
|
||||
#define STORE_ALIGNED(x) \
|
||||
CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
|
||||
.if (x != 12); \
|
||||
lw T0, (x*4)(STATE); \
|
||||
.endif; \
|
||||
lw T1, (x*4) ## (IN); \
|
||||
.if (x == 12); \
|
||||
addu X ## x, NONCE_0; \
|
||||
.else; \
|
||||
addu X ## x, T0; \
|
||||
.endif; \
|
||||
CPU_TO_LE32(X ## x); \
|
||||
xor X ## x, T1; \
|
||||
sw X ## x, (x*4) ## (OUT);
|
||||
|
||||
/* Jump table macro.
|
||||
* Used for setup and handling the last bytes, which are not multiple of 4.
|
||||
* X15 is free to store Xn
|
||||
* Every jumptable entry must be equal in size.
|
||||
*/
|
||||
#define JMPTBL_ALIGNED(x) \
|
||||
.Lchacha20_mips_jmptbl_aligned_ ## x: ; \
|
||||
.set noreorder; \
|
||||
b .Lchacha20_mips_xor_aligned_ ## x ## _b; \
|
||||
.if (x == 12); \
|
||||
addu SAVED_X, X ## x, NONCE_0; \
|
||||
.else; \
|
||||
addu SAVED_X, X ## x, SAVED_CA; \
|
||||
.endif; \
|
||||
.set reorder
|
||||
|
||||
#define JMPTBL_UNALIGNED(x) \
|
||||
.Lchacha20_mips_jmptbl_unaligned_ ## x: ; \
|
||||
.set noreorder; \
|
||||
b .Lchacha20_mips_xor_unaligned_ ## x ## _b; \
|
||||
.if (x == 12); \
|
||||
addu SAVED_X, X ## x, NONCE_0; \
|
||||
.else; \
|
||||
addu SAVED_X, X ## x, SAVED_CA; \
|
||||
.endif; \
|
||||
.set reorder
|
||||
|
||||
#define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \
|
||||
addu X(A), X(K); \
|
||||
addu X(B), X(L); \
|
||||
addu X(C), X(M); \
|
||||
addu X(D), X(N); \
|
||||
xor X(V), X(A); \
|
||||
xor X(W), X(B); \
|
||||
xor X(Y), X(C); \
|
||||
xor X(Z), X(D); \
|
||||
rotl X(V), S; \
|
||||
rotl X(W), S; \
|
||||
rotl X(Y), S; \
|
||||
rotl X(Z), S;
|
||||
|
||||
.text
|
||||
.set reorder
|
||||
.set noat
|
||||
.globl chacha20_mips
|
||||
.ent chacha20_mips
|
||||
chacha20_mips:
|
||||
.frame $sp, STACK_SIZE, $ra
|
||||
|
||||
addiu $sp, -STACK_SIZE
|
||||
|
||||
/* Return bytes = 0. */
|
||||
beqz BYTES, .Lchacha20_mips_end
|
||||
|
||||
lw NONCE_0, 48(STATE)
|
||||
|
||||
/* Save s0-s7 */
|
||||
sw $s0, 0($sp)
|
||||
sw $s1, 4($sp)
|
||||
sw $s2, 8($sp)
|
||||
sw $s3, 12($sp)
|
||||
sw $s4, 16($sp)
|
||||
sw $s5, 20($sp)
|
||||
sw $s6, 24($sp)
|
||||
sw $s7, 28($sp)
|
||||
|
||||
/* Test IN or OUT is unaligned.
|
||||
* IS_UNALIGNED = ( IN | OUT ) & 0x00000003
|
||||
*/
|
||||
or IS_UNALIGNED, IN, OUT
|
||||
andi IS_UNALIGNED, 0x3
|
||||
|
||||
/* Set number of rounds */
|
||||
li $at, 20
|
||||
|
||||
b .Lchacha20_rounds_start
|
||||
|
||||
.align 4
|
||||
.Loop_chacha20_rounds:
|
||||
addiu IN, CHACHA20_BLOCK_SIZE
|
||||
addiu OUT, CHACHA20_BLOCK_SIZE
|
||||
addiu NONCE_0, 1
|
||||
|
||||
.Lchacha20_rounds_start:
|
||||
lw X0, 0(STATE)
|
||||
lw X1, 4(STATE)
|
||||
lw X2, 8(STATE)
|
||||
lw X3, 12(STATE)
|
||||
|
||||
lw X4, 16(STATE)
|
||||
lw X5, 20(STATE)
|
||||
lw X6, 24(STATE)
|
||||
lw X7, 28(STATE)
|
||||
lw X8, 32(STATE)
|
||||
lw X9, 36(STATE)
|
||||
lw X10, 40(STATE)
|
||||
lw X11, 44(STATE)
|
||||
|
||||
move X12, NONCE_0
|
||||
lw X13, 52(STATE)
|
||||
lw X14, 56(STATE)
|
||||
lw X15, 60(STATE)
|
||||
|
||||
.Loop_chacha20_xor_rounds:
|
||||
addiu $at, -2
|
||||
AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16);
|
||||
AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12);
|
||||
AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8);
|
||||
AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7);
|
||||
AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16);
|
||||
AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12);
|
||||
AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8);
|
||||
AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7);
|
||||
bnez $at, .Loop_chacha20_xor_rounds
|
||||
|
||||
addiu BYTES, -(CHACHA20_BLOCK_SIZE)
|
||||
|
||||
/* Is data src/dst unaligned? Jump */
|
||||
bnez IS_UNALIGNED, .Loop_chacha20_unaligned
|
||||
|
||||
/* Set number rounds here to fill delayslot. */
|
||||
li $at, 20
|
||||
|
||||
/* BYTES < 0, it has no full block. */
|
||||
bltz BYTES, .Lchacha20_mips_no_full_block_aligned
|
||||
|
||||
FOR_EACH_WORD_REV(STORE_ALIGNED)
|
||||
|
||||
/* BYTES > 0? Loop again. */
|
||||
bgtz BYTES, .Loop_chacha20_rounds
|
||||
|
||||
/* Place this here to fill delay slot */
|
||||
addiu NONCE_0, 1
|
||||
|
||||
/* BYTES < 0? Handle last bytes */
|
||||
bltz BYTES, .Lchacha20_mips_xor_bytes
|
||||
|
||||
.Lchacha20_mips_xor_done:
|
||||
/* Restore used registers */
|
||||
lw $s0, 0($sp)
|
||||
lw $s1, 4($sp)
|
||||
lw $s2, 8($sp)
|
||||
lw $s3, 12($sp)
|
||||
lw $s4, 16($sp)
|
||||
lw $s5, 20($sp)
|
||||
lw $s6, 24($sp)
|
||||
lw $s7, 28($sp)
|
||||
|
||||
/* Write NONCE_0 back to right location in state */
|
||||
sw NONCE_0, 48(STATE)
|
||||
|
||||
.Lchacha20_mips_end:
|
||||
addiu $sp, STACK_SIZE
|
||||
jr $ra
|
||||
|
||||
.Lchacha20_mips_no_full_block_aligned:
|
||||
/* Restore the offset on BYTES */
|
||||
addiu BYTES, CHACHA20_BLOCK_SIZE
|
||||
|
||||
/* Get number of full WORDS */
|
||||
andi $at, BYTES, MASK_U32
|
||||
|
||||
/* Load upper half of jump table addr */
|
||||
lui T0, %hi(.Lchacha20_mips_jmptbl_aligned_0)
|
||||
|
||||
/* Calculate lower half jump table offset */
|
||||
ins T0, $at, 1, 6
|
||||
|
||||
/* Add offset to STATE */
|
||||
addu T1, STATE, $at
|
||||
|
||||
/* Add lower half jump table addr */
|
||||
addiu T0, %lo(.Lchacha20_mips_jmptbl_aligned_0)
|
||||
|
||||
/* Read value from STATE */
|
||||
lw SAVED_CA, 0(T1)
|
||||
|
||||
/* Store remaining bytecounter as negative value */
|
||||
subu BYTES, $at, BYTES
|
||||
|
||||
jr T0
|
||||
|
||||
/* Jump table */
|
||||
FOR_EACH_WORD(JMPTBL_ALIGNED)
|
||||
|
||||
|
||||
.Loop_chacha20_unaligned:
|
||||
/* Set number rounds here to fill delayslot. */
|
||||
li $at, 20
|
||||
|
||||
/* BYTES > 0, it has no full block. */
|
||||
bltz BYTES, .Lchacha20_mips_no_full_block_unaligned
|
||||
|
||||
FOR_EACH_WORD_REV(STORE_UNALIGNED)
|
||||
|
||||
/* BYTES > 0? Loop again. */
|
||||
bgtz BYTES, .Loop_chacha20_rounds
|
||||
|
||||
/* Write NONCE_0 back to right location in state */
|
||||
sw NONCE_0, 48(STATE)
|
||||
|
||||
.set noreorder
|
||||
/* Fall through to byte handling */
|
||||
bgez BYTES, .Lchacha20_mips_xor_done
|
||||
.Lchacha20_mips_xor_unaligned_0_b:
|
||||
.Lchacha20_mips_xor_aligned_0_b:
|
||||
/* Place this here to fill delay slot */
|
||||
addiu NONCE_0, 1
|
||||
.set reorder
|
||||
|
||||
.Lchacha20_mips_xor_bytes:
|
||||
addu IN, $at
|
||||
addu OUT, $at
|
||||
/* First byte */
|
||||
lbu T1, 0(IN)
|
||||
addiu $at, BYTES, 1
|
||||
CPU_TO_LE32(SAVED_X)
|
||||
ROTR(SAVED_X)
|
||||
xor T1, SAVED_X
|
||||
sb T1, 0(OUT)
|
||||
beqz $at, .Lchacha20_mips_xor_done
|
||||
/* Second byte */
|
||||
lbu T1, 1(IN)
|
||||
addiu $at, BYTES, 2
|
||||
ROTx SAVED_X, 8
|
||||
xor T1, SAVED_X
|
||||
sb T1, 1(OUT)
|
||||
beqz $at, .Lchacha20_mips_xor_done
|
||||
/* Third byte */
|
||||
lbu T1, 2(IN)
|
||||
ROTx SAVED_X, 8
|
||||
xor T1, SAVED_X
|
||||
sb T1, 2(OUT)
|
||||
b .Lchacha20_mips_xor_done
|
||||
|
||||
.Lchacha20_mips_no_full_block_unaligned:
|
||||
/* Restore the offset on BYTES */
|
||||
addiu BYTES, CHACHA20_BLOCK_SIZE
|
||||
|
||||
/* Get number of full WORDS */
|
||||
andi $at, BYTES, MASK_U32
|
||||
|
||||
/* Load upper half of jump table addr */
|
||||
lui T0, %hi(.Lchacha20_mips_jmptbl_unaligned_0)
|
||||
|
||||
/* Calculate lower half jump table offset */
|
||||
ins T0, $at, 1, 6
|
||||
|
||||
/* Add offset to STATE */
|
||||
addu T1, STATE, $at
|
||||
|
||||
/* Add lower half jump table addr */
|
||||
addiu T0, %lo(.Lchacha20_mips_jmptbl_unaligned_0)
|
||||
|
||||
/* Read value from STATE */
|
||||
lw SAVED_CA, 0(T1)
|
||||
|
||||
/* Store remaining bytecounter as negative value */
|
||||
subu BYTES, $at, BYTES
|
||||
|
||||
jr T0
|
||||
|
||||
/* Jump table */
|
||||
FOR_EACH_WORD(JMPTBL_UNALIGNED)
|
||||
.end chacha20_mips
|
||||
.set at
|
@ -0,0 +1,461 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2018 Google, Inc.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
/*
|
||||
* Design notes:
|
||||
*
|
||||
* 16 registers would be needed to hold the state matrix, but only 14 are
|
||||
* available because 'sp' and 'pc' cannot be used. So we spill the elements
|
||||
* (x8, x9) to the stack and swap them out with (x10, x11). This adds one
|
||||
* 'ldrd' and one 'strd' instruction per round.
|
||||
*
|
||||
* All rotates are performed using the implicit rotate operand accepted by the
|
||||
* 'add' and 'eor' instructions. This is faster than using explicit rotate
|
||||
* instructions. To make this work, we allow the values in the second and last
|
||||
* rows of the ChaCha state matrix (rows 'b' and 'd') to temporarily have the
|
||||
* wrong rotation amount. The rotation amount is then fixed up just in time
|
||||
* when the values are used. 'brot' is the number of bits the values in row 'b'
|
||||
* need to be rotated right to arrive at the correct values, and 'drot'
|
||||
* similarly for row 'd'. (brot, drot) start out as (0, 0) but we make it such
|
||||
* that they end up as (25, 24) after every round.
|
||||
*/
|
||||
|
||||
// ChaCha state registers
|
||||
X0 .req r0
|
||||
X1 .req r1
|
||||
X2 .req r2
|
||||
X3 .req r3
|
||||
X4 .req r4
|
||||
X5 .req r5
|
||||
X6 .req r6
|
||||
X7 .req r7
|
||||
X8_X10 .req r8 // shared by x8 and x10
|
||||
X9_X11 .req r9 // shared by x9 and x11
|
||||
X12 .req r10
|
||||
X13 .req r11
|
||||
X14 .req r12
|
||||
X15 .req r14
|
||||
|
||||
.Lexpand_32byte_k:
|
||||
// "expand 32-byte k"
|
||||
.word 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
|
||||
|
||||
#ifdef __thumb2__
|
||||
# define adrl adr
|
||||
#endif
|
||||
|
||||
.macro __rev out, in, t0, t1, t2
|
||||
.if __LINUX_ARM_ARCH__ >= 6
|
||||
rev \out, \in
|
||||
.else
|
||||
lsl \t0, \in, #24
|
||||
and \t1, \in, #0xff00
|
||||
and \t2, \in, #0xff0000
|
||||
orr \out, \t0, \in, lsr #24
|
||||
orr \out, \out, \t1, lsl #8
|
||||
orr \out, \out, \t2, lsr #8
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro _le32_bswap x, t0, t1, t2
|
||||
#ifdef __ARMEB__
|
||||
__rev \x, \x, \t0, \t1, \t2
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro _le32_bswap_4x a, b, c, d, t0, t1, t2
|
||||
_le32_bswap \a, \t0, \t1, \t2
|
||||
_le32_bswap \b, \t0, \t1, \t2
|
||||
_le32_bswap \c, \t0, \t1, \t2
|
||||
_le32_bswap \d, \t0, \t1, \t2
|
||||
.endm
|
||||
|
||||
.macro __ldrd a, b, src, offset
|
||||
#if __LINUX_ARM_ARCH__ >= 6
|
||||
ldrd \a, \b, [\src, #\offset]
|
||||
#else
|
||||
ldr \a, [\src, #\offset]
|
||||
ldr \b, [\src, #\offset + 4]
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro __strd a, b, dst, offset
|
||||
#if __LINUX_ARM_ARCH__ >= 6
|
||||
strd \a, \b, [\dst, #\offset]
|
||||
#else
|
||||
str \a, [\dst, #\offset]
|
||||
str \b, [\dst, #\offset + 4]
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro _halfround a1, b1, c1, d1, a2, b2, c2, d2
|
||||
|
||||
// a += b; d ^= a; d = rol(d, 16);
|
||||
add \a1, \a1, \b1, ror #brot
|
||||
add \a2, \a2, \b2, ror #brot
|
||||
eor \d1, \a1, \d1, ror #drot
|
||||
eor \d2, \a2, \d2, ror #drot
|
||||
// drot == 32 - 16 == 16
|
||||
|
||||
// c += d; b ^= c; b = rol(b, 12);
|
||||
add \c1, \c1, \d1, ror #16
|
||||
add \c2, \c2, \d2, ror #16
|
||||
eor \b1, \c1, \b1, ror #brot
|
||||
eor \b2, \c2, \b2, ror #brot
|
||||
// brot == 32 - 12 == 20
|
||||
|
||||
// a += b; d ^= a; d = rol(d, 8);
|
||||
add \a1, \a1, \b1, ror #20
|
||||
add \a2, \a2, \b2, ror #20
|
||||
eor \d1, \a1, \d1, ror #16
|
||||
eor \d2, \a2, \d2, ror #16
|
||||
// drot == 32 - 8 == 24
|
||||
|
||||
// c += d; b ^= c; b = rol(b, 7);
|
||||
add \c1, \c1, \d1, ror #24
|
||||
add \c2, \c2, \d2, ror #24
|
||||
eor \b1, \c1, \b1, ror #20
|
||||
eor \b2, \c2, \b2, ror #20
|
||||
// brot == 32 - 7 == 25
|
||||
.endm
|
||||
|
||||
.macro _doubleround
|
||||
|
||||
// column round
|
||||
|
||||
// quarterrounds: (x0, x4, x8, x12) and (x1, x5, x9, x13)
|
||||
_halfround X0, X4, X8_X10, X12, X1, X5, X9_X11, X13
|
||||
|
||||
// save (x8, x9); restore (x10, x11)
|
||||
__strd X8_X10, X9_X11, sp, 0
|
||||
__ldrd X8_X10, X9_X11, sp, 8
|
||||
|
||||
// quarterrounds: (x2, x6, x10, x14) and (x3, x7, x11, x15)
|
||||
_halfround X2, X6, X8_X10, X14, X3, X7, X9_X11, X15
|
||||
|
||||
.set brot, 25
|
||||
.set drot, 24
|
||||
|
||||
// diagonal round
|
||||
|
||||
// quarterrounds: (x0, x5, x10, x15) and (x1, x6, x11, x12)
|
||||
_halfround X0, X5, X8_X10, X15, X1, X6, X9_X11, X12
|
||||
|
||||
// save (x10, x11); restore (x8, x9)
|
||||
__strd X8_X10, X9_X11, sp, 8
|
||||
__ldrd X8_X10, X9_X11, sp, 0
|
||||
|
||||
// quarterrounds: (x2, x7, x8, x13) and (x3, x4, x9, x14)
|
||||
_halfround X2, X7, X8_X10, X13, X3, X4, X9_X11, X14
|
||||
.endm
|
||||
|
||||
.macro _chacha_permute nrounds
|
||||
.set brot, 0
|
||||
.set drot, 0
|
||||
.rept \nrounds / 2
|
||||
_doubleround
|
||||
.endr
|
||||
.endm
|
||||
|
||||
.macro _chacha nrounds
|
||||
|
||||
.Lnext_block\@:
|
||||
// Stack: unused0-unused1 x10-x11 x0-x15 OUT IN LEN
|
||||
// Registers contain x0-x9,x12-x15.
|
||||
|
||||
// Do the core ChaCha permutation to update x0-x15.
|
||||
_chacha_permute \nrounds
|
||||
|
||||
add sp, #8
|
||||
// Stack: x10-x11 orig_x0-orig_x15 OUT IN LEN
|
||||
// Registers contain x0-x9,x12-x15.
|
||||
// x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
|
||||
|
||||
// Free up some registers (r8-r12,r14) by pushing (x8-x9,x12-x15).
|
||||
push {X8_X10, X9_X11, X12, X13, X14, X15}
|
||||
|
||||
// Load (OUT, IN, LEN).
|
||||
ldr r14, [sp, #96]
|
||||
ldr r12, [sp, #100]
|
||||
ldr r11, [sp, #104]
|
||||
|
||||
orr r10, r14, r12
|
||||
|
||||
// Use slow path if fewer than 64 bytes remain.
|
||||
cmp r11, #64
|
||||
blt .Lxor_slowpath\@
|
||||
|
||||
// Use slow path if IN and/or OUT isn't 4-byte aligned. Needed even on
|
||||
// ARMv6+, since ldmia and stmia (used below) still require alignment.
|
||||
tst r10, #3
|
||||
bne .Lxor_slowpath\@
|
||||
|
||||
// Fast path: XOR 64 bytes of aligned data.
|
||||
|
||||
// Stack: x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
|
||||
// Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is OUT.
|
||||
// x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
|
||||
|
||||
// x0-x3
|
||||
__ldrd r8, r9, sp, 32
|
||||
__ldrd r10, r11, sp, 40
|
||||
add X0, X0, r8
|
||||
add X1, X1, r9
|
||||
add X2, X2, r10
|
||||
add X3, X3, r11
|
||||
_le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
|
||||
ldmia r12!, {r8-r11}
|
||||
eor X0, X0, r8
|
||||
eor X1, X1, r9
|
||||
eor X2, X2, r10
|
||||
eor X3, X3, r11
|
||||
stmia r14!, {X0-X3}
|
||||
|
||||
// x4-x7
|
||||
__ldrd r8, r9, sp, 48
|
||||
__ldrd r10, r11, sp, 56
|
||||
add X4, r8, X4, ror #brot
|
||||
add X5, r9, X5, ror #brot
|
||||
ldmia r12!, {X0-X3}
|
||||
add X6, r10, X6, ror #brot
|
||||
add X7, r11, X7, ror #brot
|
||||
_le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
|
||||
eor X4, X4, X0
|
||||
eor X5, X5, X1
|
||||
eor X6, X6, X2
|
||||
eor X7, X7, X3
|
||||
stmia r14!, {X4-X7}
|
||||
|
||||
// x8-x15
|
||||
pop {r0-r7} // (x8-x9,x12-x15,x10-x11)
|
||||
__ldrd r8, r9, sp, 32
|
||||
__ldrd r10, r11, sp, 40
|
||||
add r0, r0, r8 // x8
|
||||
add r1, r1, r9 // x9
|
||||
add r6, r6, r10 // x10
|
||||
add r7, r7, r11 // x11
|
||||
_le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
|
||||
ldmia r12!, {r8-r11}
|
||||
eor r0, r0, r8 // x8
|
||||
eor r1, r1, r9 // x9
|
||||
eor r6, r6, r10 // x10
|
||||
eor r7, r7, r11 // x11
|
||||
stmia r14!, {r0,r1,r6,r7}
|
||||
ldmia r12!, {r0,r1,r6,r7}
|
||||
__ldrd r8, r9, sp, 48
|
||||
__ldrd r10, r11, sp, 56
|
||||
add r2, r8, r2, ror #drot // x12
|
||||
add r3, r9, r3, ror #drot // x13
|
||||
add r4, r10, r4, ror #drot // x14
|
||||
add r5, r11, r5, ror #drot // x15
|
||||
_le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
|
||||
ldr r9, [sp, #72] // load LEN
|
||||
eor r2, r2, r0 // x12
|
||||
eor r3, r3, r1 // x13
|
||||
eor r4, r4, r6 // x14
|
||||
eor r5, r5, r7 // x15
|
||||
subs r9, #64 // decrement and check LEN
|
||||
stmia r14!, {r2-r5}
|
||||
|
||||
beq .Ldone\@
|
||||
|
||||
.Lprepare_for_next_block\@:
|
||||
|
||||
// Stack: x0-x15 OUT IN LEN
|
||||
|
||||
// Increment block counter (x12)
|
||||
add r8, #1
|
||||
|
||||
// Store updated (OUT, IN, LEN)
|
||||
str r14, [sp, #64]
|
||||
str r12, [sp, #68]
|
||||
str r9, [sp, #72]
|
||||
|
||||
mov r14, sp
|
||||
|
||||
// Store updated block counter (x12)
|
||||
str r8, [sp, #48]
|
||||
|
||||
sub sp, #16
|
||||
|
||||
// Reload state and do next block
|
||||
ldmia r14!, {r0-r11} // load x0-x11
|
||||
__strd r10, r11, sp, 8 // store x10-x11 before state
|
||||
ldmia r14, {r10-r12,r14} // load x12-x15
|
||||
b .Lnext_block\@
|
||||
|
||||
.Lxor_slowpath\@:
|
||||
// Slow path: < 64 bytes remaining, or unaligned input or output buffer.
|
||||
// We handle it by storing the 64 bytes of keystream to the stack, then
|
||||
// XOR-ing the needed portion with the data.
|
||||
|
||||
// Allocate keystream buffer
|
||||
sub sp, #64
|
||||
mov r14, sp
|
||||
|
||||
// Stack: ks0-ks15 x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
|
||||
// Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is &ks0.
|
||||
// x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
|
||||
|
||||
// Save keystream for x0-x3
|
||||
__ldrd r8, r9, sp, 96
|
||||
__ldrd r10, r11, sp, 104
|
||||
add X0, X0, r8
|
||||
add X1, X1, r9
|
||||
add X2, X2, r10
|
||||
add X3, X3, r11
|
||||
_le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
|
||||
stmia r14!, {X0-X3}
|
||||
|
||||
// Save keystream for x4-x7
|
||||
__ldrd r8, r9, sp, 112
|
||||
__ldrd r10, r11, sp, 120
|
||||
add X4, r8, X4, ror #brot
|
||||
add X5, r9, X5, ror #brot
|
||||
add X6, r10, X6, ror #brot
|
||||
add X7, r11, X7, ror #brot
|
||||
_le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
|
||||
add r8, sp, #64
|
||||
stmia r14!, {X4-X7}
|
||||
|
||||
// Save keystream for x8-x15
|
||||
ldm r8, {r0-r7} // (x8-x9,x12-x15,x10-x11)
|
||||
__ldrd r8, r9, sp, 128
|
||||
__ldrd r10, r11, sp, 136
|
||||
add r0, r0, r8 // x8
|
||||
add r1, r1, r9 // x9
|
||||
add r6, r6, r10 // x10
|
||||
add r7, r7, r11 // x11
|
||||
_le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
|
||||
stmia r14!, {r0,r1,r6,r7}
|
||||
__ldrd r8, r9, sp, 144
|
||||
__ldrd r10, r11, sp, 152
|
||||
add r2, r8, r2, ror #drot // x12
|
||||
add r3, r9, r3, ror #drot // x13
|
||||
add r4, r10, r4, ror #drot // x14
|
||||
add r5, r11, r5, ror #drot // x15
|
||||
_le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
|
||||
stmia r14, {r2-r5}
|
||||
|
||||
// Stack: ks0-ks15 unused0-unused7 x0-x15 OUT IN LEN
|
||||
// Registers: r8 is block counter, r12 is IN.
|
||||
|
||||
ldr r9, [sp, #168] // LEN
|
||||
ldr r14, [sp, #160] // OUT
|
||||
cmp r9, #64
|
||||
mov r0, sp
|
||||
movle r1, r9
|
||||
movgt r1, #64
|
||||
// r1 is number of bytes to XOR, in range [1, 64]
|
||||
|
||||
.if __LINUX_ARM_ARCH__ < 6
|
||||
orr r2, r12, r14
|
||||
tst r2, #3 // IN or OUT misaligned?
|
||||
bne .Lxor_next_byte\@
|
||||
.endif
|
||||
|
||||
// XOR a word at a time
|
||||
.rept 16
|
||||
subs r1, #4
|
||||
blt .Lxor_words_done\@
|
||||
ldr r2, [r12], #4
|
||||
ldr r3, [r0], #4
|
||||
eor r2, r2, r3
|
||||
str r2, [r14], #4
|
||||
.endr
|
||||
b .Lxor_slowpath_done\@
|
||||
.Lxor_words_done\@:
|
||||
ands r1, r1, #3
|
||||
beq .Lxor_slowpath_done\@
|
||||
|
||||
// XOR a byte at a time
|
||||
.Lxor_next_byte\@:
|
||||
ldrb r2, [r12], #1
|
||||
ldrb r3, [r0], #1
|
||||
eor r2, r2, r3
|
||||
strb r2, [r14], #1
|
||||
subs r1, #1
|
||||
bne .Lxor_next_byte\@
|
||||
|
||||
.Lxor_slowpath_done\@:
|
||||
subs r9, #64
|
||||
add sp, #96
|
||||
bgt .Lprepare_for_next_block\@
|
||||
|
||||
.Ldone\@:
|
||||
.endm // _chacha
|
||||
|
||||
/*
|
||||
* void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8],
|
||||
* const u32 iv[4]);
|
||||
*/
|
||||
SYM_FUNC_START(chacha20_arm)
|
||||
cmp r2, #0 // len == 0?
|
||||
reteq lr
|
||||
|
||||
push {r0-r2,r4-r11,lr}
|
||||
|
||||
// Push state x0-x15 onto stack.
|
||||
// Also store an extra copy of x10-x11 just before the state.
|
||||
|
||||
ldr r4, [sp, #48] // iv
|
||||
mov r0, sp
|
||||
sub sp, #80
|
||||
|
||||
// iv: x12-x15
|
||||
ldm r4, {X12,X13,X14,X15}
|
||||
stmdb r0!, {X12,X13,X14,X15}
|
||||
|
||||
// key: x4-x11
|
||||
__ldrd X8_X10, X9_X11, r3, 24
|
||||
__strd X8_X10, X9_X11, sp, 8
|
||||
stmdb r0!, {X8_X10, X9_X11}
|
||||
ldm r3, {X4-X9_X11}
|
||||
stmdb r0!, {X4-X9_X11}
|
||||
|
||||
// constants: x0-x3
|
||||
adrl X3, .Lexpand_32byte_k
|
||||
ldm X3, {X0-X3}
|
||||
__strd X0, X1, sp, 16
|
||||
__strd X2, X3, sp, 24
|
||||
|
||||
_chacha 20
|
||||
|
||||
add sp, #76
|
||||
pop {r4-r11, pc}
|
||||
SYM_FUNC_END(chacha20_arm)
|
||||
|
||||
/*
|
||||
* void hchacha20_arm(const u32 state[16], u32 out[8]);
|
||||
*/
|
||||
SYM_FUNC_START(hchacha20_arm)
|
||||
push {r1,r4-r11,lr}
|
||||
|
||||
mov r14, r0
|
||||
ldmia r14!, {r0-r11} // load x0-x11
|
||||
push {r10-r11} // store x10-x11 to stack
|
||||
ldm r14, {r10-r12,r14} // load x12-x15
|
||||
sub sp, #8
|
||||
|
||||
_chacha_permute 20
|
||||
|
||||
// Skip over (unused0-unused1, x10-x11)
|
||||
add sp, #16
|
||||
|
||||
// Fix up rotations of x12-x15
|
||||
ror X12, X12, #drot
|
||||
ror X13, X13, #drot
|
||||
pop {r4} // load 'out'
|
||||
ror X14, X14, #drot
|
||||
ror X15, X15, #drot
|
||||
|
||||
// Store (x0-x3,x12-x15) to 'out'
|
||||
stm r4, {X0,X1,X2,X3,X12,X13,X14,X15}
|
||||
|
||||
pop {r4-r11,pc}
|
||||
SYM_FUNC_END(hchacha20_arm)
|
132
sys/dev/if_wg/module/crypto/zinc/chacha20/chacha20-x86_64-glue.c
Normal file
132
sys/dev/if_wg/module/crypto/zinc/chacha20/chacha20-x86_64-glue.c
Normal file
@ -0,0 +1,132 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
#ifdef __linux__
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/intel-family.h>
|
||||
#else
|
||||
#include <sys/simd-x86_64.h>
|
||||
#endif
|
||||
|
||||
asmlinkage void hchacha20_ssse3(u32 *derived_key, const u8 *nonce,
|
||||
const u8 *key);
|
||||
asmlinkage void chacha20_ssse3(u8 *out, const u8 *in, const size_t len,
|
||||
const u32 key[8], const u32 counter[4]);
|
||||
asmlinkage void chacha20_avx2(u8 *out, const u8 *in, const size_t len,
|
||||
const u32 key[8], const u32 counter[4]);
|
||||
asmlinkage void chacha20_avx512(u8 *out, const u8 *in, const size_t len,
|
||||
const u32 key[8], const u32 counter[4]);
|
||||
asmlinkage void chacha20_avx512vl(u8 *out, const u8 *in, const size_t len,
|
||||
const u32 key[8], const u32 counter[4]);
|
||||
|
||||
static bool chacha20_use_ssse3 __ro_after_init;
|
||||
static bool chacha20_use_avx2 __ro_after_init;
|
||||
static bool chacha20_use_avx512 __ro_after_init;
|
||||
static bool chacha20_use_avx512vl __ro_after_init;
|
||||
static bool *const chacha20_nobs[] __initconst = {
|
||||
&chacha20_use_ssse3, &chacha20_use_avx2, &chacha20_use_avx512,
|
||||
&chacha20_use_avx512vl };
|
||||
|
||||
static void __init chacha20_fpu_init(void)
|
||||
{
|
||||
#ifdef __linux__
|
||||
chacha20_use_ssse3 = boot_cpu_has(X86_FEATURE_SSSE3);
|
||||
chacha20_use_avx2 =
|
||||
boot_cpu_has(X86_FEATURE_AVX) &&
|
||||
boot_cpu_has(X86_FEATURE_AVX2) &&
|
||||
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
|
||||
#ifndef COMPAT_CANNOT_USE_AVX512
|
||||
chacha20_use_avx512 =
|
||||
boot_cpu_has(X86_FEATURE_AVX) &&
|
||||
boot_cpu_has(X86_FEATURE_AVX2) &&
|
||||
boot_cpu_has(X86_FEATURE_AVX512F) &&
|
||||
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
|
||||
XFEATURE_MASK_AVX512, NULL) &&
|
||||
/* Skylake downclocks unacceptably much when using zmm. */
|
||||
boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X;
|
||||
chacha20_use_avx512vl =
|
||||
boot_cpu_has(X86_FEATURE_AVX) &&
|
||||
boot_cpu_has(X86_FEATURE_AVX2) &&
|
||||
boot_cpu_has(X86_FEATURE_AVX512F) &&
|
||||
boot_cpu_has(X86_FEATURE_AVX512VL) &&
|
||||
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
|
||||
XFEATURE_MASK_AVX512, NULL);
|
||||
#endif
|
||||
#else
|
||||
chacha20_use_ssse3 = !!(cpu_feature2 & CPUID2_SSSE3);
|
||||
chacha20_use_avx2 = !!(cpu_feature2 & CPUID2_AVX) &&
|
||||
!!(cpu_stdext_feature & CPUID_STDEXT_AVX2) &&
|
||||
__ymm_enabled();
|
||||
chacha20_use_avx512 = chacha20_use_avx2 &&
|
||||
!!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) &&
|
||||
__zmm_enabled();
|
||||
chacha20_use_avx512vl = chacha20_use_avx512 &&
|
||||
!!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) &&
|
||||
!!(cpu_stdext_feature & CPUID_STDEXT_AVX512VL);
|
||||
#endif
|
||||
if (bootverbose)
|
||||
printf("ssse3: %d avx2: %d avx512: %d avx512vl: %d\n",
|
||||
chacha20_use_ssse3,
|
||||
chacha20_use_avx2,
|
||||
chacha20_use_avx512,
|
||||
chacha20_use_avx512vl);
|
||||
}
|
||||
|
||||
static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst,
|
||||
const u8 *src, size_t len,
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
/* SIMD disables preemption, so relax after processing each page. */
|
||||
BUILD_BUG_ON(PAGE_SIZE < CHACHA20_BLOCK_SIZE ||
|
||||
PAGE_SIZE % CHACHA20_BLOCK_SIZE);
|
||||
|
||||
if (!chacha20_use_ssse3) {
|
||||
return false;
|
||||
}
|
||||
if (len <= CHACHA20_BLOCK_SIZE) {
|
||||
return false;
|
||||
}
|
||||
if (!simd_use(simd_context)) {
|
||||
return false;
|
||||
}
|
||||
for (;;) {
|
||||
const size_t bytes = min_t(size_t, len, PAGE_SIZE);
|
||||
|
||||
if (chacha20_use_avx512 &&
|
||||
len >= CHACHA20_BLOCK_SIZE * 8)
|
||||
chacha20_avx512(dst, src, bytes, ctx->key, ctx->counter);
|
||||
else if (chacha20_use_avx512vl &&
|
||||
len >= CHACHA20_BLOCK_SIZE * 4)
|
||||
chacha20_avx512vl(dst, src, bytes, ctx->key, ctx->counter);
|
||||
else if (chacha20_use_avx2 &&
|
||||
len >= CHACHA20_BLOCK_SIZE * 4)
|
||||
chacha20_avx2(dst, src, bytes, ctx->key, ctx->counter);
|
||||
else
|
||||
chacha20_ssse3(dst, src, bytes, ctx->key, ctx->counter);
|
||||
ctx->counter[0] += (bytes + 63) / 64;
|
||||
len -= bytes;
|
||||
if (!len)
|
||||
break;
|
||||
dst += bytes;
|
||||
src += bytes;
|
||||
simd_relax(simd_context);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS],
|
||||
const u8 nonce[HCHACHA20_NONCE_SIZE],
|
||||
const u8 key[HCHACHA20_KEY_SIZE],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_AS_SSSE3) && chacha20_use_ssse3 &&
|
||||
simd_use(simd_context)) {
|
||||
hchacha20_ssse3(derived_key, nonce, key);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
4106
sys/dev/if_wg/module/crypto/zinc/chacha20/chacha20-x86_64.pl
Executable file
4106
sys/dev/if_wg/module/crypto/zinc/chacha20/chacha20-x86_64.pl
Executable file
File diff suppressed because it is too large
Load Diff
238
sys/dev/if_wg/module/crypto/zinc/chacha20/chacha20.c
Normal file
238
sys/dev/if_wg/module/crypto/zinc/chacha20/chacha20.c
Normal file
@ -0,0 +1,238 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*
|
||||
* Implementation of the ChaCha20 stream cipher.
|
||||
*
|
||||
* Information: https://cr.yp.to/chacha.html
|
||||
*/
|
||||
|
||||
#include <zinc/chacha20.h>
|
||||
#include "../selftest/run.h"
|
||||
#define IS_ENABLED_CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
|
||||
|
||||
#define IS_ENABLED_CONFIG_64BIT (sizeof(void*) == 8)
|
||||
|
||||
void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int len)
|
||||
{
|
||||
int relalign = 0;
|
||||
|
||||
if (!IS_ENABLED_CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) {
|
||||
int size = sizeof(unsigned long);
|
||||
int d = (((unsigned long)dst ^ (unsigned long)src1) |
|
||||
((unsigned long)dst ^ (unsigned long)src2)) &
|
||||
(size - 1);
|
||||
|
||||
relalign = d ? 1 << ffs(d) : size;
|
||||
|
||||
/*
|
||||
* If we care about alignment, process as many bytes as
|
||||
* needed to advance dst and src to values whose alignments
|
||||
* equal their relative alignment. This will allow us to
|
||||
* process the remainder of the input using optimal strides.
|
||||
*/
|
||||
while (((unsigned long)dst & (relalign - 1)) && len > 0) {
|
||||
*dst++ = *src1++ ^ *src2++;
|
||||
len--;
|
||||
}
|
||||
}
|
||||
|
||||
while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) {
|
||||
*(u64 *)dst = *(const u64 *)src1 ^ *(const u64 *)src2;
|
||||
dst += 8;
|
||||
src1 += 8;
|
||||
src2 += 8;
|
||||
len -= 8;
|
||||
}
|
||||
|
||||
while (len >= 4 && !(relalign & 3)) {
|
||||
*(u32 *)dst = *(const u32 *)src1 ^ *(const u32 *)src2;
|
||||
dst += 4;
|
||||
src1 += 4;
|
||||
src2 += 4;
|
||||
len -= 4;
|
||||
}
|
||||
|
||||
while (len >= 2 && !(relalign & 1)) {
|
||||
*(u16 *)dst = *(const u16 *)src1 ^ *(const u16 *)src2;
|
||||
dst += 2;
|
||||
src1 += 2;
|
||||
src2 += 2;
|
||||
len -= 2;
|
||||
}
|
||||
|
||||
while (len--)
|
||||
*dst++ = *src1++ ^ *src2++;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_ZINC_ARCH_X86_64)
|
||||
#include "chacha20-x86_64-glue.c"
|
||||
#elif defined(CONFIG_ZINC_ARCH_ARM) || defined(CONFIG_ZINC_ARCH_ARM64)
|
||||
#include "chacha20-arm-glue.c"
|
||||
#elif defined(CONFIG_ZINC_ARCH_MIPS)
|
||||
#include "chacha20-mips-glue.c"
|
||||
#else
|
||||
static bool *const chacha20_nobs[] __initconst = { };
|
||||
static void __init chacha20_fpu_init(void)
|
||||
{
|
||||
}
|
||||
static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst,
|
||||
const u8 *src, size_t len,
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS],
|
||||
const u8 nonce[HCHACHA20_NONCE_SIZE],
|
||||
const u8 key[HCHACHA20_KEY_SIZE],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define QUARTER_ROUND(x, a, b, c, d) ( \
|
||||
x[a] += x[b], \
|
||||
x[d] = rol32((x[d] ^ x[a]), 16), \
|
||||
x[c] += x[d], \
|
||||
x[b] = rol32((x[b] ^ x[c]), 12), \
|
||||
x[a] += x[b], \
|
||||
x[d] = rol32((x[d] ^ x[a]), 8), \
|
||||
x[c] += x[d], \
|
||||
x[b] = rol32((x[b] ^ x[c]), 7) \
|
||||
)
|
||||
|
||||
#define C(i, j) (i * 4 + j)
|
||||
|
||||
#define DOUBLE_ROUND(x) ( \
|
||||
/* Column Round */ \
|
||||
QUARTER_ROUND(x, C(0, 0), C(1, 0), C(2, 0), C(3, 0)), \
|
||||
QUARTER_ROUND(x, C(0, 1), C(1, 1), C(2, 1), C(3, 1)), \
|
||||
QUARTER_ROUND(x, C(0, 2), C(1, 2), C(2, 2), C(3, 2)), \
|
||||
QUARTER_ROUND(x, C(0, 3), C(1, 3), C(2, 3), C(3, 3)), \
|
||||
/* Diagonal Round */ \
|
||||
QUARTER_ROUND(x, C(0, 0), C(1, 1), C(2, 2), C(3, 3)), \
|
||||
QUARTER_ROUND(x, C(0, 1), C(1, 2), C(2, 3), C(3, 0)), \
|
||||
QUARTER_ROUND(x, C(0, 2), C(1, 3), C(2, 0), C(3, 1)), \
|
||||
QUARTER_ROUND(x, C(0, 3), C(1, 0), C(2, 1), C(3, 2)) \
|
||||
)
|
||||
|
||||
#define TWENTY_ROUNDS(x) ( \
|
||||
DOUBLE_ROUND(x), \
|
||||
DOUBLE_ROUND(x), \
|
||||
DOUBLE_ROUND(x), \
|
||||
DOUBLE_ROUND(x), \
|
||||
DOUBLE_ROUND(x), \
|
||||
DOUBLE_ROUND(x), \
|
||||
DOUBLE_ROUND(x), \
|
||||
DOUBLE_ROUND(x), \
|
||||
DOUBLE_ROUND(x), \
|
||||
DOUBLE_ROUND(x) \
|
||||
)
|
||||
|
||||
static void chacha20_block_generic(struct chacha20_ctx *ctx, __le32 *stream)
|
||||
{
|
||||
u32 x[CHACHA20_BLOCK_WORDS];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(x); ++i)
|
||||
x[i] = ctx->state[i];
|
||||
|
||||
TWENTY_ROUNDS(x);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(x); ++i)
|
||||
stream[i] = cpu_to_le32(x[i] + ctx->state[i]);
|
||||
|
||||
ctx->counter[0] += 1;
|
||||
}
|
||||
|
||||
static void chacha20_generic(struct chacha20_ctx *ctx, u8 *out, const u8 *in,
|
||||
u32 len)
|
||||
{
|
||||
__le32 buf[CHACHA20_BLOCK_WORDS];
|
||||
|
||||
while (len >= CHACHA20_BLOCK_SIZE) {
|
||||
chacha20_block_generic(ctx, buf);
|
||||
crypto_xor_cpy(out, in, (u8 *)buf, CHACHA20_BLOCK_SIZE);
|
||||
len -= CHACHA20_BLOCK_SIZE;
|
||||
out += CHACHA20_BLOCK_SIZE;
|
||||
in += CHACHA20_BLOCK_SIZE;
|
||||
}
|
||||
if (len) {
|
||||
chacha20_block_generic(ctx, buf);
|
||||
crypto_xor_cpy(out, in, (u8 *)buf, len);
|
||||
}
|
||||
}
|
||||
|
||||
void chacha20(struct chacha20_ctx *ctx, u8 *dst, const u8 *src, u32 len,
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
if (!chacha20_arch(ctx, dst, src, len, simd_context))
|
||||
chacha20_generic(ctx, dst, src, len);
|
||||
}
|
||||
EXPORT_SYMBOL(chacha20);
|
||||
|
||||
static void hchacha20_generic(u32 derived_key[CHACHA20_KEY_WORDS],
|
||||
const u8 nonce[HCHACHA20_NONCE_SIZE],
|
||||
const u8 key[HCHACHA20_KEY_SIZE])
|
||||
{
|
||||
u32 x[] = { CHACHA20_CONSTANT_EXPA,
|
||||
CHACHA20_CONSTANT_ND_3,
|
||||
CHACHA20_CONSTANT_2_BY,
|
||||
CHACHA20_CONSTANT_TE_K,
|
||||
get_unaligned_le32(key + 0),
|
||||
get_unaligned_le32(key + 4),
|
||||
get_unaligned_le32(key + 8),
|
||||
get_unaligned_le32(key + 12),
|
||||
get_unaligned_le32(key + 16),
|
||||
get_unaligned_le32(key + 20),
|
||||
get_unaligned_le32(key + 24),
|
||||
get_unaligned_le32(key + 28),
|
||||
get_unaligned_le32(nonce + 0),
|
||||
get_unaligned_le32(nonce + 4),
|
||||
get_unaligned_le32(nonce + 8),
|
||||
get_unaligned_le32(nonce + 12)
|
||||
};
|
||||
|
||||
TWENTY_ROUNDS(x);
|
||||
|
||||
memcpy(derived_key + 0, x + 0, sizeof(u32) * 4);
|
||||
memcpy(derived_key + 4, x + 12, sizeof(u32) * 4);
|
||||
}
|
||||
|
||||
/* Derived key should be 32-bit aligned */
|
||||
void hchacha20(u32 derived_key[CHACHA20_KEY_WORDS],
|
||||
const u8 nonce[HCHACHA20_NONCE_SIZE],
|
||||
const u8 key[HCHACHA20_KEY_SIZE], simd_context_t *simd_context)
|
||||
{
|
||||
if (!hchacha20_arch(derived_key, nonce, key, simd_context))
|
||||
hchacha20_generic(derived_key, nonce, key);
|
||||
}
|
||||
EXPORT_SYMBOL(hchacha20);
|
||||
|
||||
#include "../selftest/chacha20.c"
|
||||
|
||||
static bool nosimd __initdata = false;
|
||||
|
||||
#ifndef COMPAT_ZINC_IS_A_MODULE
|
||||
int __init chacha20_mod_init(void)
|
||||
#else
|
||||
static int __init mod_init(void)
|
||||
#endif
|
||||
{
|
||||
if (!nosimd)
|
||||
chacha20_fpu_init();
|
||||
if (!selftest_run("chacha20", chacha20_selftest, chacha20_nobs,
|
||||
ARRAY_SIZE(chacha20_nobs)))
|
||||
return -ENOTRECOVERABLE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef COMPAT_ZINC_IS_A_MODULE
|
||||
static void __exit mod_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
module_init(mod_init);
|
||||
module_exit(mod_exit);
|
||||
#endif
|
196
sys/dev/if_wg/module/crypto/zinc/chacha20poly1305.c
Normal file
196
sys/dev/if_wg/module/crypto/zinc/chacha20poly1305.c
Normal file
@ -0,0 +1,196 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*
|
||||
* This is an implementation of the ChaCha20Poly1305 AEAD construction.
|
||||
*
|
||||
* Information: https://tools.ietf.org/html/rfc8439
|
||||
*/
|
||||
|
||||
#include <sys/support.h>
|
||||
#include <zinc/chacha20poly1305.h>
|
||||
#include <zinc/chacha20.h>
|
||||
#include <zinc/poly1305.h>
|
||||
#include "selftest/run.h"
|
||||
|
||||
static const u8 pad0[CHACHA20_BLOCK_SIZE] = { 0 };
|
||||
|
||||
static inline void
|
||||
__chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
|
||||
const u8 *ad, const size_t ad_len, const u64 nonce,
|
||||
const u8 key[CHACHA20POLY1305_KEY_SIZE],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
struct poly1305_ctx poly1305_state;
|
||||
struct chacha20_ctx chacha20_state;
|
||||
union {
|
||||
u8 block0[POLY1305_KEY_SIZE];
|
||||
__le64 lens[2];
|
||||
} b = { { 0 } };
|
||||
|
||||
chacha20_init(&chacha20_state, key, nonce);
|
||||
chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0),
|
||||
simd_context);
|
||||
poly1305_init(&poly1305_state, b.block0);
|
||||
|
||||
poly1305_update(&poly1305_state, ad, ad_len, simd_context);
|
||||
poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf,
|
||||
simd_context);
|
||||
|
||||
chacha20(&chacha20_state, dst, src, src_len, simd_context);
|
||||
|
||||
poly1305_update(&poly1305_state, dst, src_len, simd_context);
|
||||
poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf,
|
||||
simd_context);
|
||||
|
||||
b.lens[0] = cpu_to_le64(ad_len);
|
||||
b.lens[1] = cpu_to_le64(src_len);
|
||||
poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens),
|
||||
simd_context);
|
||||
|
||||
poly1305_final(&poly1305_state, dst + src_len, simd_context);
|
||||
|
||||
memzero_explicit(&chacha20_state, sizeof(chacha20_state));
|
||||
memzero_explicit(&b, sizeof(b));
|
||||
}
|
||||
|
||||
void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
|
||||
const u8 *ad, const size_t ad_len,
|
||||
const u64 nonce,
|
||||
const u8 key[CHACHA20POLY1305_KEY_SIZE])
|
||||
{
|
||||
simd_context_t simd_context;
|
||||
|
||||
simd_get(&simd_context);
|
||||
__chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, nonce, key,
|
||||
&simd_context);
|
||||
simd_put(&simd_context);
|
||||
}
|
||||
EXPORT_SYMBOL(chacha20poly1305_encrypt);
|
||||
static inline bool
|
||||
__chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
|
||||
const u8 *ad, const size_t ad_len, const u64 nonce,
|
||||
const u8 key[CHACHA20POLY1305_KEY_SIZE],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
struct poly1305_ctx poly1305_state;
|
||||
struct chacha20_ctx chacha20_state;
|
||||
int ret;
|
||||
size_t dst_len;
|
||||
union {
|
||||
u8 block0[POLY1305_KEY_SIZE];
|
||||
u8 mac[POLY1305_MAC_SIZE];
|
||||
__le64 lens[2];
|
||||
} b = { { 0 } };
|
||||
|
||||
if (unlikely(src_len < POLY1305_MAC_SIZE)) {
|
||||
printf("src_len too short\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
chacha20_init(&chacha20_state, key, nonce);
|
||||
chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0),
|
||||
simd_context);
|
||||
poly1305_init(&poly1305_state, b.block0);
|
||||
|
||||
poly1305_update(&poly1305_state, ad, ad_len, simd_context);
|
||||
poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf,
|
||||
simd_context);
|
||||
|
||||
dst_len = src_len - POLY1305_MAC_SIZE;
|
||||
poly1305_update(&poly1305_state, src, dst_len, simd_context);
|
||||
poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf,
|
||||
simd_context);
|
||||
|
||||
b.lens[0] = cpu_to_le64(ad_len);
|
||||
b.lens[1] = cpu_to_le64(dst_len);
|
||||
poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens),
|
||||
simd_context);
|
||||
|
||||
poly1305_final(&poly1305_state, b.mac, simd_context);
|
||||
|
||||
ret = crypto_memneq(b.mac, src + dst_len, POLY1305_MAC_SIZE);
|
||||
if (likely(!ret))
|
||||
chacha20(&chacha20_state, dst, src, dst_len, simd_context);
|
||||
else {
|
||||
printf("calculated: %16D\n", b.mac, "");
|
||||
printf("sent : %16D\n", src + dst_len, "");
|
||||
}
|
||||
memzero_explicit(&chacha20_state, sizeof(chacha20_state));
|
||||
memzero_explicit(&b, sizeof(b));
|
||||
|
||||
return !ret;
|
||||
}
|
||||
|
||||
bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
|
||||
const u8 *ad, const size_t ad_len,
|
||||
const u64 nonce,
|
||||
const u8 key[CHACHA20POLY1305_KEY_SIZE])
|
||||
{
|
||||
simd_context_t simd_context;
|
||||
bool ret;
|
||||
|
||||
simd_get(&simd_context);
|
||||
ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, nonce,
|
||||
key, &simd_context);
|
||||
simd_put(&simd_context);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(chacha20poly1305_decrypt);
|
||||
|
||||
void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
|
||||
const u8 *ad, const size_t ad_len,
|
||||
const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
|
||||
const u8 key[CHACHA20POLY1305_KEY_SIZE])
|
||||
{
|
||||
simd_context_t simd_context;
|
||||
u32 derived_key[CHACHA20_KEY_WORDS] __aligned(16);
|
||||
|
||||
simd_get(&simd_context);
|
||||
hchacha20(derived_key, nonce, key, &simd_context);
|
||||
cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key));
|
||||
__chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len,
|
||||
get_unaligned_le64(nonce + 16),
|
||||
(u8 *)derived_key, &simd_context);
|
||||
memzero_explicit(derived_key, CHACHA20POLY1305_KEY_SIZE);
|
||||
simd_put(&simd_context);
|
||||
}
|
||||
EXPORT_SYMBOL(xchacha20poly1305_encrypt);
|
||||
|
||||
bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
|
||||
const u8 *ad, const size_t ad_len,
|
||||
const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
|
||||
const u8 key[CHACHA20POLY1305_KEY_SIZE])
|
||||
{
|
||||
bool ret;
|
||||
simd_context_t simd_context;
|
||||
u32 derived_key[CHACHA20_KEY_WORDS] __aligned(16);
|
||||
|
||||
simd_get(&simd_context);
|
||||
hchacha20(derived_key, nonce, key, &simd_context);
|
||||
cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key));
|
||||
ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len,
|
||||
get_unaligned_le64(nonce + 16),
|
||||
(u8 *)derived_key, &simd_context);
|
||||
memzero_explicit(derived_key, CHACHA20POLY1305_KEY_SIZE);
|
||||
simd_put(&simd_context);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(xchacha20poly1305_decrypt);
|
||||
|
||||
#include "selftest/chacha20poly1305.c"
|
||||
|
||||
static int __init mod_init(void)
|
||||
{
|
||||
if (!selftest_run("chacha20poly1305", chacha20poly1305_selftest,
|
||||
NULL, 0))
|
||||
return -ENOTRECOVERABLE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit mod_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
module_init(mod_init);
|
||||
module_exit(mod_exit);
|
140
sys/dev/if_wg/module/crypto/zinc/poly1305/poly1305-arm-glue.c
Normal file
140
sys/dev/if_wg/module/crypto/zinc/poly1305/poly1305-arm-glue.c
Normal file
@ -0,0 +1,140 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
|
||||
asmlinkage void poly1305_init_arm(void *ctx, const u8 key[16]);
|
||||
asmlinkage void poly1305_blocks_arm(void *ctx, const u8 *inp, const size_t len,
|
||||
const u32 padbit);
|
||||
asmlinkage void poly1305_emit_arm(void *ctx, u8 mac[16], const u32 nonce[4]);
|
||||
asmlinkage void poly1305_blocks_neon(void *ctx, const u8 *inp, const size_t len,
|
||||
const u32 padbit);
|
||||
asmlinkage void poly1305_emit_neon(void *ctx, u8 mac[16], const u32 nonce[4]);
|
||||
|
||||
static bool poly1305_use_neon __ro_after_init;
|
||||
static bool *const poly1305_nobs[] __initconst = { &poly1305_use_neon };
|
||||
|
||||
static void __init poly1305_fpu_init(void)
|
||||
{
|
||||
#if defined(CONFIG_ZINC_ARCH_ARM64)
|
||||
poly1305_use_neon = cpu_have_named_feature(ASIMD);
|
||||
#elif defined(CONFIG_ZINC_ARCH_ARM)
|
||||
poly1305_use_neon = elf_hwcap & HWCAP_NEON;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(CONFIG_ZINC_ARCH_ARM64)
|
||||
struct poly1305_arch_internal {
|
||||
union {
|
||||
u32 h[5];
|
||||
struct {
|
||||
u64 h0, h1, h2;
|
||||
};
|
||||
};
|
||||
u64 is_base2_26;
|
||||
u64 r[2];
|
||||
};
|
||||
#elif defined(CONFIG_ZINC_ARCH_ARM)
|
||||
struct poly1305_arch_internal {
|
||||
union {
|
||||
u32 h[5];
|
||||
struct {
|
||||
u64 h0, h1;
|
||||
u32 h2;
|
||||
} __packed;
|
||||
};
|
||||
u32 r[4];
|
||||
u32 is_base2_26;
|
||||
};
|
||||
#endif
|
||||
|
||||
/* The NEON code uses base 2^26, while the scalar code uses base 2^64 on 64-bit
|
||||
* and base 2^32 on 32-bit. If we hit the unfortunate situation of using NEON
|
||||
* and then having to go back to scalar -- because the user is silly and has
|
||||
* called the update function from two separate contexts -- then we need to
|
||||
* convert back to the original base before proceeding. The below function is
|
||||
* written for 64-bit integers, and so we have to swap words at the end on
|
||||
* big-endian 32-bit. It is possible to reason that the initial reduction below
|
||||
* is sufficient given the implementation invariants. However, for an avoidance
|
||||
* of doubt and because this is not performance critical, we do the full
|
||||
* reduction anyway.
|
||||
*/
|
||||
static void convert_to_base2_64(void *ctx)
|
||||
{
|
||||
struct poly1305_arch_internal *state = ctx;
|
||||
u32 cy;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !state->is_base2_26)
|
||||
return;
|
||||
|
||||
cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy;
|
||||
cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy;
|
||||
cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy;
|
||||
cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy;
|
||||
state->h0 = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0];
|
||||
state->h1 = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12);
|
||||
state->h2 = state->h[4] >> 24;
|
||||
if (IS_ENABLED(CONFIG_ZINC_ARCH_ARM) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) {
|
||||
state->h0 = rol64(state->h0, 32);
|
||||
state->h1 = rol64(state->h1, 32);
|
||||
}
|
||||
#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
|
||||
cy = (state->h2 >> 2) + (state->h2 & ~3ULL);
|
||||
state->h2 &= 3;
|
||||
state->h0 += cy;
|
||||
state->h1 += (cy = ULT(state->h0, cy));
|
||||
state->h2 += ULT(state->h1, cy);
|
||||
#undef ULT
|
||||
state->is_base2_26 = 0;
|
||||
}
|
||||
|
||||
static inline bool poly1305_init_arch(void *ctx,
|
||||
const u8 key[POLY1305_KEY_SIZE])
|
||||
{
|
||||
poly1305_init_arm(ctx, key);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp,
|
||||
size_t len, const u32 padbit,
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
/* SIMD disables preemption, so relax after processing each page. */
|
||||
BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE ||
|
||||
PAGE_SIZE % POLY1305_BLOCK_SIZE);
|
||||
|
||||
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !poly1305_use_neon ||
|
||||
!simd_use(simd_context)) {
|
||||
convert_to_base2_64(ctx);
|
||||
poly1305_blocks_arm(ctx, inp, len, padbit);
|
||||
return true;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
const size_t bytes = min_t(size_t, len, PAGE_SIZE);
|
||||
|
||||
poly1305_blocks_neon(ctx, inp, bytes, padbit);
|
||||
len -= bytes;
|
||||
if (!len)
|
||||
break;
|
||||
inp += bytes;
|
||||
simd_relax(simd_context);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
|
||||
const u32 nonce[4],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !poly1305_use_neon ||
|
||||
!simd_use(simd_context)) {
|
||||
convert_to_base2_64(ctx);
|
||||
poly1305_emit_arm(ctx, mac, nonce);
|
||||
} else
|
||||
poly1305_emit_neon(ctx, mac, nonce);
|
||||
return true;
|
||||
}
|
1276
sys/dev/if_wg/module/crypto/zinc/poly1305/poly1305-arm.pl
Executable file
1276
sys/dev/if_wg/module/crypto/zinc/poly1305/poly1305-arm.pl
Executable file
File diff suppressed because it is too large
Load Diff
974
sys/dev/if_wg/module/crypto/zinc/poly1305/poly1305-arm64.pl
Executable file
974
sys/dev/if_wg/module/crypto/zinc/poly1305/poly1305-arm64.pl
Executable file
@ -0,0 +1,974 @@
|
||||
#!/usr/bin/env perl
|
||||
# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
|
||||
#
|
||||
# This code is taken from the OpenSSL project but the author, Andy Polyakov,
|
||||
# has relicensed it under the licenses specified in the SPDX header above.
|
||||
# The original headers, including the original license headers, are
|
||||
# included below for completeness.
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
#
|
||||
# This module implements Poly1305 hash for ARMv8.
|
||||
#
|
||||
# June 2015
|
||||
#
|
||||
# Numbers are cycles per processed byte with poly1305_blocks alone.
|
||||
#
|
||||
# IALU/gcc-4.9 NEON
|
||||
#
|
||||
# Apple A7 1.86/+5% 0.72
|
||||
# Cortex-A53 2.69/+58% 1.47
|
||||
# Cortex-A57 2.70/+7% 1.14
|
||||
# Denver 1.64/+50% 1.18(*)
|
||||
# X-Gene 2.13/+68% 2.27
|
||||
# Mongoose 1.77/+75% 1.12
|
||||
# Kryo 2.70/+55% 1.13
|
||||
#
|
||||
# (*) estimate based on resources availability is less than 1.0,
|
||||
# i.e. measured result is worse than expected, presumably binary
|
||||
# translator is not almighty;
|
||||
|
||||
$flavour=shift;
|
||||
if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
|
||||
else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
|
||||
|
||||
if ($flavour && $flavour ne "void") {
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
|
||||
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
|
||||
die "can't locate arm-xlate.pl";
|
||||
|
||||
open STDOUT,"| \"$^X\" $xlate $flavour $output";
|
||||
} else {
|
||||
open STDOUT,">$output";
|
||||
}
|
||||
|
||||
my ($ctx,$inp,$len,$padbit) = map("x$_",(0..3));
|
||||
my ($mac,$nonce)=($inp,$len);
|
||||
|
||||
my ($h0,$h1,$h2,$r0,$r1,$s1,$t0,$t1,$d0,$d1,$d2) = map("x$_",(4..14));
|
||||
|
||||
$code.=<<___;
|
||||
#ifndef __KERNEL__
|
||||
# include "arm_arch.h"
|
||||
.extern OPENSSL_armcap_P
|
||||
#else
|
||||
# define poly1305_init poly1305_init_arm
|
||||
# define poly1305_blocks poly1305_blocks_arm
|
||||
# define poly1305_emit poly1305_emit_arm
|
||||
#endif
|
||||
|
||||
.text
|
||||
|
||||
// forward "declarations" are required for Apple
|
||||
.globl poly1305_blocks
|
||||
.globl poly1305_emit
|
||||
.globl poly1305_init
|
||||
.type poly1305_init,%function
|
||||
.align 5
|
||||
poly1305_init:
|
||||
cmp $inp,xzr
|
||||
stp xzr,xzr,[$ctx] // zero hash value
|
||||
stp xzr,xzr,[$ctx,#16] // [along with is_base2_26]
|
||||
|
||||
csel x0,xzr,x0,eq
|
||||
b.eq .Lno_key
|
||||
|
||||
#ifndef __KERNEL__
|
||||
# ifdef __ILP32__
|
||||
ldrsw $t1,.LOPENSSL_armcap_P
|
||||
# else
|
||||
ldr $t1,.LOPENSSL_armcap_P
|
||||
# endif
|
||||
adr $t0,.LOPENSSL_armcap_P
|
||||
ldr w17,[$t0,$t1]
|
||||
#endif
|
||||
|
||||
ldp $r0,$r1,[$inp] // load key
|
||||
mov $s1,#0xfffffffc0fffffff
|
||||
movk $s1,#0x0fff,lsl#48
|
||||
#ifdef __AARCH64EB__
|
||||
rev $r0,$r0 // flip bytes
|
||||
rev $r1,$r1
|
||||
#endif
|
||||
and $r0,$r0,$s1 // &=0ffffffc0fffffff
|
||||
and $s1,$s1,#-4
|
||||
and $r1,$r1,$s1 // &=0ffffffc0ffffffc
|
||||
stp $r0,$r1,[$ctx,#32] // save key value
|
||||
|
||||
#ifndef __KERNEL__
|
||||
tst w17,#ARMV7_NEON
|
||||
|
||||
adr $d0,poly1305_blocks
|
||||
adr $r0,poly1305_blocks_neon
|
||||
adr $d1,poly1305_emit
|
||||
adr $r1,poly1305_emit_neon
|
||||
|
||||
csel $d0,$d0,$r0,eq
|
||||
csel $d1,$d1,$r1,eq
|
||||
|
||||
# ifdef __ILP32__
|
||||
stp w12,w13,[$len]
|
||||
# else
|
||||
stp $d0,$d1,[$len]
|
||||
# endif
|
||||
|
||||
mov x0,#1
|
||||
#else
|
||||
mov x0,#0
|
||||
#endif
|
||||
.Lno_key:
|
||||
ret
|
||||
.size poly1305_init,.-poly1305_init
|
||||
|
||||
.type poly1305_blocks,%function
|
||||
.align 5
|
||||
poly1305_blocks:
|
||||
ands $len,$len,#-16
|
||||
b.eq .Lno_data
|
||||
|
||||
ldp $h0,$h1,[$ctx] // load hash value
|
||||
ldp $r0,$r1,[$ctx,#32] // load key value
|
||||
ldr $h2,[$ctx,#16]
|
||||
add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
|
||||
b .Loop
|
||||
|
||||
.align 5
|
||||
.Loop:
|
||||
ldp $t0,$t1,[$inp],#16 // load input
|
||||
sub $len,$len,#16
|
||||
#ifdef __AARCH64EB__
|
||||
rev $t0,$t0
|
||||
rev $t1,$t1
|
||||
#endif
|
||||
adds $h0,$h0,$t0 // accumulate input
|
||||
adcs $h1,$h1,$t1
|
||||
|
||||
mul $d0,$h0,$r0 // h0*r0
|
||||
adc $h2,$h2,$padbit
|
||||
umulh $d1,$h0,$r0
|
||||
|
||||
mul $t0,$h1,$s1 // h1*5*r1
|
||||
umulh $t1,$h1,$s1
|
||||
|
||||
adds $d0,$d0,$t0
|
||||
mul $t0,$h0,$r1 // h0*r1
|
||||
adc $d1,$d1,$t1
|
||||
umulh $d2,$h0,$r1
|
||||
|
||||
adds $d1,$d1,$t0
|
||||
mul $t0,$h1,$r0 // h1*r0
|
||||
adc $d2,$d2,xzr
|
||||
umulh $t1,$h1,$r0
|
||||
|
||||
adds $d1,$d1,$t0
|
||||
mul $t0,$h2,$s1 // h2*5*r1
|
||||
adc $d2,$d2,$t1
|
||||
mul $t1,$h2,$r0 // h2*r0
|
||||
|
||||
adds $d1,$d1,$t0
|
||||
adc $d2,$d2,$t1
|
||||
|
||||
and $t0,$d2,#-4 // final reduction
|
||||
and $h2,$d2,#3
|
||||
add $t0,$t0,$d2,lsr#2
|
||||
adds $h0,$d0,$t0
|
||||
adcs $h1,$d1,xzr
|
||||
adc $h2,$h2,xzr
|
||||
|
||||
cbnz $len,.Loop
|
||||
|
||||
stp $h0,$h1,[$ctx] // store hash value
|
||||
str $h2,[$ctx,#16]
|
||||
|
||||
.Lno_data:
|
||||
ret
|
||||
.size poly1305_blocks,.-poly1305_blocks
|
||||
|
||||
.type poly1305_emit,%function
|
||||
.align 5
|
||||
poly1305_emit:
|
||||
ldp $h0,$h1,[$ctx] // load hash base 2^64
|
||||
ldr $h2,[$ctx,#16]
|
||||
ldp $t0,$t1,[$nonce] // load nonce
|
||||
|
||||
adds $d0,$h0,#5 // compare to modulus
|
||||
adcs $d1,$h1,xzr
|
||||
adc $d2,$h2,xzr
|
||||
|
||||
tst $d2,#-4 // see if it's carried/borrowed
|
||||
|
||||
csel $h0,$h0,$d0,eq
|
||||
csel $h1,$h1,$d1,eq
|
||||
|
||||
#ifdef __AARCH64EB__
|
||||
ror $t0,$t0,#32 // flip nonce words
|
||||
ror $t1,$t1,#32
|
||||
#endif
|
||||
adds $h0,$h0,$t0 // accumulate nonce
|
||||
adc $h1,$h1,$t1
|
||||
#ifdef __AARCH64EB__
|
||||
rev $h0,$h0 // flip output bytes
|
||||
rev $h1,$h1
|
||||
#endif
|
||||
stp $h0,$h1,[$mac] // write result
|
||||
|
||||
ret
|
||||
.size poly1305_emit,.-poly1305_emit
|
||||
___
|
||||
my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("v$_.4s",(0..8));
|
||||
my ($IN01_0,$IN01_1,$IN01_2,$IN01_3,$IN01_4) = map("v$_.2s",(9..13));
|
||||
my ($IN23_0,$IN23_1,$IN23_2,$IN23_3,$IN23_4) = map("v$_.2s",(14..18));
|
||||
my ($ACC0,$ACC1,$ACC2,$ACC3,$ACC4) = map("v$_.2d",(19..23));
|
||||
my ($H0,$H1,$H2,$H3,$H4) = map("v$_.2s",(24..28));
|
||||
my ($T0,$T1,$MASK) = map("v$_",(29..31));
|
||||
|
||||
my ($in2,$zeros)=("x16","x17");
|
||||
my $is_base2_26 = $zeros; # borrow
|
||||
|
||||
$code.=<<___;
|
||||
.type __poly1305_mult,%function
|
||||
.align 5
|
||||
__poly1305_mult:
|
||||
mul $d0,$h0,$r0 // h0*r0
|
||||
umulh $d1,$h0,$r0
|
||||
|
||||
mul $t0,$h1,$s1 // h1*5*r1
|
||||
umulh $t1,$h1,$s1
|
||||
|
||||
adds $d0,$d0,$t0
|
||||
mul $t0,$h0,$r1 // h0*r1
|
||||
adc $d1,$d1,$t1
|
||||
umulh $d2,$h0,$r1
|
||||
|
||||
adds $d1,$d1,$t0
|
||||
mul $t0,$h1,$r0 // h1*r0
|
||||
adc $d2,$d2,xzr
|
||||
umulh $t1,$h1,$r0
|
||||
|
||||
adds $d1,$d1,$t0
|
||||
mul $t0,$h2,$s1 // h2*5*r1
|
||||
adc $d2,$d2,$t1
|
||||
mul $t1,$h2,$r0 // h2*r0
|
||||
|
||||
adds $d1,$d1,$t0
|
||||
adc $d2,$d2,$t1
|
||||
|
||||
and $t0,$d2,#-4 // final reduction
|
||||
and $h2,$d2,#3
|
||||
add $t0,$t0,$d2,lsr#2
|
||||
adds $h0,$d0,$t0
|
||||
adcs $h1,$d1,xzr
|
||||
adc $h2,$h2,xzr
|
||||
|
||||
ret
|
||||
.size __poly1305_mult,.-__poly1305_mult
|
||||
|
||||
.type __poly1305_splat,%function
|
||||
.align 5
|
||||
__poly1305_splat:
|
||||
and x12,$h0,#0x03ffffff // base 2^64 -> base 2^26
|
||||
ubfx x13,$h0,#26,#26
|
||||
extr x14,$h1,$h0,#52
|
||||
and x14,x14,#0x03ffffff
|
||||
ubfx x15,$h1,#14,#26
|
||||
extr x16,$h2,$h1,#40
|
||||
|
||||
str w12,[$ctx,#16*0] // r0
|
||||
add w12,w13,w13,lsl#2 // r1*5
|
||||
str w13,[$ctx,#16*1] // r1
|
||||
add w13,w14,w14,lsl#2 // r2*5
|
||||
str w12,[$ctx,#16*2] // s1
|
||||
str w14,[$ctx,#16*3] // r2
|
||||
add w14,w15,w15,lsl#2 // r3*5
|
||||
str w13,[$ctx,#16*4] // s2
|
||||
str w15,[$ctx,#16*5] // r3
|
||||
add w15,w16,w16,lsl#2 // r4*5
|
||||
str w14,[$ctx,#16*6] // s3
|
||||
str w16,[$ctx,#16*7] // r4
|
||||
str w15,[$ctx,#16*8] // s4
|
||||
|
||||
ret
|
||||
.size __poly1305_splat,.-__poly1305_splat
|
||||
|
||||
#if !defined(__KERNEL__) || defined(CONFIG_KERNEL_MODE_NEON)
|
||||
#ifdef __KERNEL__
|
||||
.globl poly1305_blocks_neon
|
||||
.globl poly1305_emit_neon
|
||||
#endif
|
||||
|
||||
.type poly1305_blocks_neon,%function
|
||||
.align 5
|
||||
poly1305_blocks_neon:
|
||||
ldr $is_base2_26,[$ctx,#24]
|
||||
cmp $len,#128
|
||||
b.hs .Lblocks_neon
|
||||
cbz $is_base2_26,poly1305_blocks
|
||||
|
||||
.Lblocks_neon:
|
||||
stp x29,x30,[sp,#-80]!
|
||||
add x29,sp,#0
|
||||
|
||||
ands $len,$len,#-16
|
||||
b.eq .Lno_data_neon
|
||||
|
||||
cbz $is_base2_26,.Lbase2_64_neon
|
||||
|
||||
ldp w10,w11,[$ctx] // load hash value base 2^26
|
||||
ldp w12,w13,[$ctx,#8]
|
||||
ldr w14,[$ctx,#16]
|
||||
|
||||
tst $len,#31
|
||||
b.eq .Leven_neon
|
||||
|
||||
ldp $r0,$r1,[$ctx,#32] // load key value
|
||||
|
||||
add $h0,x10,x11,lsl#26 // base 2^26 -> base 2^64
|
||||
lsr $h1,x12,#12
|
||||
adds $h0,$h0,x12,lsl#52
|
||||
add $h1,$h1,x13,lsl#14
|
||||
adc $h1,$h1,xzr
|
||||
lsr $h2,x14,#24
|
||||
adds $h1,$h1,x14,lsl#40
|
||||
adc $d2,$h2,xzr // can be partially reduced...
|
||||
|
||||
ldp $d0,$d1,[$inp],#16 // load input
|
||||
sub $len,$len,#16
|
||||
add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
|
||||
|
||||
and $t0,$d2,#-4 // ... so reduce
|
||||
and $h2,$d2,#3
|
||||
add $t0,$t0,$d2,lsr#2
|
||||
adds $h0,$h0,$t0
|
||||
adcs $h1,$h1,xzr
|
||||
adc $h2,$h2,xzr
|
||||
|
||||
#ifdef __AARCH64EB__
|
||||
rev $d0,$d0
|
||||
rev $d1,$d1
|
||||
#endif
|
||||
adds $h0,$h0,$d0 // accumulate input
|
||||
adcs $h1,$h1,$d1
|
||||
adc $h2,$h2,$padbit
|
||||
|
||||
bl __poly1305_mult
|
||||
ldr x30,[sp,#8]
|
||||
|
||||
cbz $padbit,.Lstore_base2_64_neon
|
||||
|
||||
and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26
|
||||
ubfx x11,$h0,#26,#26
|
||||
extr x12,$h1,$h0,#52
|
||||
and x12,x12,#0x03ffffff
|
||||
ubfx x13,$h1,#14,#26
|
||||
extr x14,$h2,$h1,#40
|
||||
|
||||
cbnz $len,.Leven_neon
|
||||
|
||||
stp w10,w11,[$ctx] // store hash value base 2^26
|
||||
stp w12,w13,[$ctx,#8]
|
||||
str w14,[$ctx,#16]
|
||||
b .Lno_data_neon
|
||||
|
||||
.align 4
|
||||
.Lstore_base2_64_neon:
|
||||
stp $h0,$h1,[$ctx] // store hash value base 2^64
|
||||
stp $h2,xzr,[$ctx,#16] // note that is_base2_26 is zeroed
|
||||
b .Lno_data_neon
|
||||
|
||||
.align 4
|
||||
.Lbase2_64_neon:
|
||||
ldp $r0,$r1,[$ctx,#32] // load key value
|
||||
|
||||
ldp $h0,$h1,[$ctx] // load hash value base 2^64
|
||||
ldr $h2,[$ctx,#16]
|
||||
|
||||
tst $len,#31
|
||||
b.eq .Linit_neon
|
||||
|
||||
ldp $d0,$d1,[$inp],#16 // load input
|
||||
sub $len,$len,#16
|
||||
add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
|
||||
#ifdef __AARCH64EB__
|
||||
rev $d0,$d0
|
||||
rev $d1,$d1
|
||||
#endif
|
||||
adds $h0,$h0,$d0 // accumulate input
|
||||
adcs $h1,$h1,$d1
|
||||
adc $h2,$h2,$padbit
|
||||
|
||||
bl __poly1305_mult
|
||||
|
||||
.Linit_neon:
|
||||
and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26
|
||||
ubfx x11,$h0,#26,#26
|
||||
extr x12,$h1,$h0,#52
|
||||
and x12,x12,#0x03ffffff
|
||||
ubfx x13,$h1,#14,#26
|
||||
extr x14,$h2,$h1,#40
|
||||
|
||||
stp d8,d9,[sp,#16] // meet ABI requirements
|
||||
stp d10,d11,[sp,#32]
|
||||
stp d12,d13,[sp,#48]
|
||||
stp d14,d15,[sp,#64]
|
||||
|
||||
fmov ${H0},x10
|
||||
fmov ${H1},x11
|
||||
fmov ${H2},x12
|
||||
fmov ${H3},x13
|
||||
fmov ${H4},x14
|
||||
|
||||
////////////////////////////////// initialize r^n table
|
||||
mov $h0,$r0 // r^1
|
||||
add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
|
||||
mov $h1,$r1
|
||||
mov $h2,xzr
|
||||
add $ctx,$ctx,#48+12
|
||||
bl __poly1305_splat
|
||||
|
||||
bl __poly1305_mult // r^2
|
||||
sub $ctx,$ctx,#4
|
||||
bl __poly1305_splat
|
||||
|
||||
bl __poly1305_mult // r^3
|
||||
sub $ctx,$ctx,#4
|
||||
bl __poly1305_splat
|
||||
|
||||
bl __poly1305_mult // r^4
|
||||
sub $ctx,$ctx,#4
|
||||
bl __poly1305_splat
|
||||
ldr x30,[sp,#8]
|
||||
|
||||
add $in2,$inp,#32
|
||||
adr $zeros,.Lzeros
|
||||
subs $len,$len,#64
|
||||
csel $in2,$zeros,$in2,lo
|
||||
|
||||
mov x4,#1
|
||||
str x4,[$ctx,#-24] // set is_base2_26
|
||||
sub $ctx,$ctx,#48 // restore original $ctx
|
||||
b .Ldo_neon
|
||||
|
||||
.align 4
|
||||
.Leven_neon:
|
||||
add $in2,$inp,#32
|
||||
adr $zeros,.Lzeros
|
||||
subs $len,$len,#64
|
||||
csel $in2,$zeros,$in2,lo
|
||||
|
||||
stp d8,d9,[sp,#16] // meet ABI requirements
|
||||
stp d10,d11,[sp,#32]
|
||||
stp d12,d13,[sp,#48]
|
||||
stp d14,d15,[sp,#64]
|
||||
|
||||
fmov ${H0},x10
|
||||
fmov ${H1},x11
|
||||
fmov ${H2},x12
|
||||
fmov ${H3},x13
|
||||
fmov ${H4},x14
|
||||
|
||||
.Ldo_neon:
|
||||
ldp x8,x12,[$in2],#16 // inp[2:3] (or zero)
|
||||
ldp x9,x13,[$in2],#48
|
||||
|
||||
lsl $padbit,$padbit,#24
|
||||
add x15,$ctx,#48
|
||||
|
||||
#ifdef __AARCH64EB__
|
||||
rev x8,x8
|
||||
rev x12,x12
|
||||
rev x9,x9
|
||||
rev x13,x13
|
||||
#endif
|
||||
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
|
||||
and x5,x9,#0x03ffffff
|
||||
ubfx x6,x8,#26,#26
|
||||
ubfx x7,x9,#26,#26
|
||||
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
|
||||
extr x8,x12,x8,#52
|
||||
extr x9,x13,x9,#52
|
||||
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
|
||||
fmov $IN23_0,x4
|
||||
and x8,x8,#0x03ffffff
|
||||
and x9,x9,#0x03ffffff
|
||||
ubfx x10,x12,#14,#26
|
||||
ubfx x11,x13,#14,#26
|
||||
add x12,$padbit,x12,lsr#40
|
||||
add x13,$padbit,x13,lsr#40
|
||||
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
|
||||
fmov $IN23_1,x6
|
||||
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
|
||||
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
|
||||
fmov $IN23_2,x8
|
||||
fmov $IN23_3,x10
|
||||
fmov $IN23_4,x12
|
||||
|
||||
ldp x8,x12,[$inp],#16 // inp[0:1]
|
||||
ldp x9,x13,[$inp],#48
|
||||
|
||||
ld1 {$R0,$R1,$S1,$R2},[x15],#64
|
||||
ld1 {$S2,$R3,$S3,$R4},[x15],#64
|
||||
ld1 {$S4},[x15]
|
||||
|
||||
#ifdef __AARCH64EB__
|
||||
rev x8,x8
|
||||
rev x12,x12
|
||||
rev x9,x9
|
||||
rev x13,x13
|
||||
#endif
|
||||
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
|
||||
and x5,x9,#0x03ffffff
|
||||
ubfx x6,x8,#26,#26
|
||||
ubfx x7,x9,#26,#26
|
||||
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
|
||||
extr x8,x12,x8,#52
|
||||
extr x9,x13,x9,#52
|
||||
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
|
||||
fmov $IN01_0,x4
|
||||
and x8,x8,#0x03ffffff
|
||||
and x9,x9,#0x03ffffff
|
||||
ubfx x10,x12,#14,#26
|
||||
ubfx x11,x13,#14,#26
|
||||
add x12,$padbit,x12,lsr#40
|
||||
add x13,$padbit,x13,lsr#40
|
||||
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
|
||||
fmov $IN01_1,x6
|
||||
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
|
||||
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
|
||||
movi $MASK.2d,#-1
|
||||
fmov $IN01_2,x8
|
||||
fmov $IN01_3,x10
|
||||
fmov $IN01_4,x12
|
||||
ushr $MASK.2d,$MASK.2d,#38
|
||||
|
||||
b.ls .Lskip_loop
|
||||
|
||||
.align 4
|
||||
.Loop_neon:
|
||||
////////////////////////////////////////////////////////////////
|
||||
// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
|
||||
// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
|
||||
// \___________________/
|
||||
// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
|
||||
// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
|
||||
// \___________________/ \____________________/
|
||||
//
|
||||
// Note that we start with inp[2:3]*r^2. This is because it
|
||||
// doesn't depend on reduction in previous iteration.
|
||||
////////////////////////////////////////////////////////////////
|
||||
// d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0
|
||||
// d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4
|
||||
// d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3
|
||||
// d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2
|
||||
// d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
|
||||
|
||||
subs $len,$len,#64
|
||||
umull $ACC4,$IN23_0,${R4}[2]
|
||||
csel $in2,$zeros,$in2,lo
|
||||
umull $ACC3,$IN23_0,${R3}[2]
|
||||
umull $ACC2,$IN23_0,${R2}[2]
|
||||
ldp x8,x12,[$in2],#16 // inp[2:3] (or zero)
|
||||
umull $ACC1,$IN23_0,${R1}[2]
|
||||
ldp x9,x13,[$in2],#48
|
||||
umull $ACC0,$IN23_0,${R0}[2]
|
||||
#ifdef __AARCH64EB__
|
||||
rev x8,x8
|
||||
rev x12,x12
|
||||
rev x9,x9
|
||||
rev x13,x13
|
||||
#endif
|
||||
|
||||
umlal $ACC4,$IN23_1,${R3}[2]
|
||||
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
|
||||
umlal $ACC3,$IN23_1,${R2}[2]
|
||||
and x5,x9,#0x03ffffff
|
||||
umlal $ACC2,$IN23_1,${R1}[2]
|
||||
ubfx x6,x8,#26,#26
|
||||
umlal $ACC1,$IN23_1,${R0}[2]
|
||||
ubfx x7,x9,#26,#26
|
||||
umlal $ACC0,$IN23_1,${S4}[2]
|
||||
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
|
||||
|
||||
umlal $ACC4,$IN23_2,${R2}[2]
|
||||
extr x8,x12,x8,#52
|
||||
umlal $ACC3,$IN23_2,${R1}[2]
|
||||
extr x9,x13,x9,#52
|
||||
umlal $ACC2,$IN23_2,${R0}[2]
|
||||
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
|
||||
umlal $ACC1,$IN23_2,${S4}[2]
|
||||
fmov $IN23_0,x4
|
||||
umlal $ACC0,$IN23_2,${S3}[2]
|
||||
and x8,x8,#0x03ffffff
|
||||
|
||||
umlal $ACC4,$IN23_3,${R1}[2]
|
||||
and x9,x9,#0x03ffffff
|
||||
umlal $ACC3,$IN23_3,${R0}[2]
|
||||
ubfx x10,x12,#14,#26
|
||||
umlal $ACC2,$IN23_3,${S4}[2]
|
||||
ubfx x11,x13,#14,#26
|
||||
umlal $ACC1,$IN23_3,${S3}[2]
|
||||
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
|
||||
umlal $ACC0,$IN23_3,${S2}[2]
|
||||
fmov $IN23_1,x6
|
||||
|
||||
add $IN01_2,$IN01_2,$H2
|
||||
add x12,$padbit,x12,lsr#40
|
||||
umlal $ACC4,$IN23_4,${R0}[2]
|
||||
add x13,$padbit,x13,lsr#40
|
||||
umlal $ACC3,$IN23_4,${S4}[2]
|
||||
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
|
||||
umlal $ACC2,$IN23_4,${S3}[2]
|
||||
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
|
||||
umlal $ACC1,$IN23_4,${S2}[2]
|
||||
fmov $IN23_2,x8
|
||||
umlal $ACC0,$IN23_4,${S1}[2]
|
||||
fmov $IN23_3,x10
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// (hash+inp[0:1])*r^4 and accumulate
|
||||
|
||||
add $IN01_0,$IN01_0,$H0
|
||||
fmov $IN23_4,x12
|
||||
umlal $ACC3,$IN01_2,${R1}[0]
|
||||
ldp x8,x12,[$inp],#16 // inp[0:1]
|
||||
umlal $ACC0,$IN01_2,${S3}[0]
|
||||
ldp x9,x13,[$inp],#48
|
||||
umlal $ACC4,$IN01_2,${R2}[0]
|
||||
umlal $ACC1,$IN01_2,${S4}[0]
|
||||
umlal $ACC2,$IN01_2,${R0}[0]
|
||||
#ifdef __AARCH64EB__
|
||||
rev x8,x8
|
||||
rev x12,x12
|
||||
rev x9,x9
|
||||
rev x13,x13
|
||||
#endif
|
||||
|
||||
add $IN01_1,$IN01_1,$H1
|
||||
umlal $ACC3,$IN01_0,${R3}[0]
|
||||
umlal $ACC4,$IN01_0,${R4}[0]
|
||||
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
|
||||
umlal $ACC2,$IN01_0,${R2}[0]
|
||||
and x5,x9,#0x03ffffff
|
||||
umlal $ACC0,$IN01_0,${R0}[0]
|
||||
ubfx x6,x8,#26,#26
|
||||
umlal $ACC1,$IN01_0,${R1}[0]
|
||||
ubfx x7,x9,#26,#26
|
||||
|
||||
add $IN01_3,$IN01_3,$H3
|
||||
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
|
||||
umlal $ACC3,$IN01_1,${R2}[0]
|
||||
extr x8,x12,x8,#52
|
||||
umlal $ACC4,$IN01_1,${R3}[0]
|
||||
extr x9,x13,x9,#52
|
||||
umlal $ACC0,$IN01_1,${S4}[0]
|
||||
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
|
||||
umlal $ACC2,$IN01_1,${R1}[0]
|
||||
fmov $IN01_0,x4
|
||||
umlal $ACC1,$IN01_1,${R0}[0]
|
||||
and x8,x8,#0x03ffffff
|
||||
|
||||
add $IN01_4,$IN01_4,$H4
|
||||
and x9,x9,#0x03ffffff
|
||||
umlal $ACC3,$IN01_3,${R0}[0]
|
||||
ubfx x10,x12,#14,#26
|
||||
umlal $ACC0,$IN01_3,${S2}[0]
|
||||
ubfx x11,x13,#14,#26
|
||||
umlal $ACC4,$IN01_3,${R1}[0]
|
||||
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
|
||||
umlal $ACC1,$IN01_3,${S3}[0]
|
||||
fmov $IN01_1,x6
|
||||
umlal $ACC2,$IN01_3,${S4}[0]
|
||||
add x12,$padbit,x12,lsr#40
|
||||
|
||||
umlal $ACC3,$IN01_4,${S4}[0]
|
||||
add x13,$padbit,x13,lsr#40
|
||||
umlal $ACC0,$IN01_4,${S1}[0]
|
||||
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
|
||||
umlal $ACC4,$IN01_4,${R0}[0]
|
||||
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
|
||||
umlal $ACC1,$IN01_4,${S2}[0]
|
||||
fmov $IN01_2,x8
|
||||
umlal $ACC2,$IN01_4,${S3}[0]
|
||||
fmov $IN01_3,x10
|
||||
fmov $IN01_4,x12
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
|
||||
// and P. Schwabe
|
||||
//
|
||||
// [see discussion in poly1305-armv4 module]
|
||||
|
||||
ushr $T0.2d,$ACC3,#26
|
||||
xtn $H3,$ACC3
|
||||
ushr $T1.2d,$ACC0,#26
|
||||
and $ACC0,$ACC0,$MASK.2d
|
||||
add $ACC4,$ACC4,$T0.2d // h3 -> h4
|
||||
bic $H3,#0xfc,lsl#24 // &=0x03ffffff
|
||||
add $ACC1,$ACC1,$T1.2d // h0 -> h1
|
||||
|
||||
ushr $T0.2d,$ACC4,#26
|
||||
xtn $H4,$ACC4
|
||||
ushr $T1.2d,$ACC1,#26
|
||||
xtn $H1,$ACC1
|
||||
bic $H4,#0xfc,lsl#24
|
||||
add $ACC2,$ACC2,$T1.2d // h1 -> h2
|
||||
|
||||
add $ACC0,$ACC0,$T0.2d
|
||||
shl $T0.2d,$T0.2d,#2
|
||||
shrn $T1.2s,$ACC2,#26
|
||||
xtn $H2,$ACC2
|
||||
add $ACC0,$ACC0,$T0.2d // h4 -> h0
|
||||
bic $H1,#0xfc,lsl#24
|
||||
add $H3,$H3,$T1.2s // h2 -> h3
|
||||
bic $H2,#0xfc,lsl#24
|
||||
|
||||
shrn $T0.2s,$ACC0,#26
|
||||
xtn $H0,$ACC0
|
||||
ushr $T1.2s,$H3,#26
|
||||
bic $H3,#0xfc,lsl#24
|
||||
bic $H0,#0xfc,lsl#24
|
||||
add $H1,$H1,$T0.2s // h0 -> h1
|
||||
add $H4,$H4,$T1.2s // h3 -> h4
|
||||
|
||||
b.hi .Loop_neon
|
||||
|
||||
.Lskip_loop:
|
||||
dup $IN23_2,${IN23_2}[0]
|
||||
add $IN01_2,$IN01_2,$H2
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
|
||||
|
||||
adds $len,$len,#32
|
||||
b.ne .Long_tail
|
||||
|
||||
dup $IN23_2,${IN01_2}[0]
|
||||
add $IN23_0,$IN01_0,$H0
|
||||
add $IN23_3,$IN01_3,$H3
|
||||
add $IN23_1,$IN01_1,$H1
|
||||
add $IN23_4,$IN01_4,$H4
|
||||
|
||||
.Long_tail:
|
||||
dup $IN23_0,${IN23_0}[0]
|
||||
umull2 $ACC0,$IN23_2,${S3}
|
||||
umull2 $ACC3,$IN23_2,${R1}
|
||||
umull2 $ACC4,$IN23_2,${R2}
|
||||
umull2 $ACC2,$IN23_2,${R0}
|
||||
umull2 $ACC1,$IN23_2,${S4}
|
||||
|
||||
dup $IN23_1,${IN23_1}[0]
|
||||
umlal2 $ACC0,$IN23_0,${R0}
|
||||
umlal2 $ACC2,$IN23_0,${R2}
|
||||
umlal2 $ACC3,$IN23_0,${R3}
|
||||
umlal2 $ACC4,$IN23_0,${R4}
|
||||
umlal2 $ACC1,$IN23_0,${R1}
|
||||
|
||||
dup $IN23_3,${IN23_3}[0]
|
||||
umlal2 $ACC0,$IN23_1,${S4}
|
||||
umlal2 $ACC3,$IN23_1,${R2}
|
||||
umlal2 $ACC2,$IN23_1,${R1}
|
||||
umlal2 $ACC4,$IN23_1,${R3}
|
||||
umlal2 $ACC1,$IN23_1,${R0}
|
||||
|
||||
dup $IN23_4,${IN23_4}[0]
|
||||
umlal2 $ACC3,$IN23_3,${R0}
|
||||
umlal2 $ACC4,$IN23_3,${R1}
|
||||
umlal2 $ACC0,$IN23_3,${S2}
|
||||
umlal2 $ACC1,$IN23_3,${S3}
|
||||
umlal2 $ACC2,$IN23_3,${S4}
|
||||
|
||||
umlal2 $ACC3,$IN23_4,${S4}
|
||||
umlal2 $ACC0,$IN23_4,${S1}
|
||||
umlal2 $ACC4,$IN23_4,${R0}
|
||||
umlal2 $ACC1,$IN23_4,${S2}
|
||||
umlal2 $ACC2,$IN23_4,${S3}
|
||||
|
||||
b.eq .Lshort_tail
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// (hash+inp[0:1])*r^4:r^3 and accumulate
|
||||
|
||||
add $IN01_0,$IN01_0,$H0
|
||||
umlal $ACC3,$IN01_2,${R1}
|
||||
umlal $ACC0,$IN01_2,${S3}
|
||||
umlal $ACC4,$IN01_2,${R2}
|
||||
umlal $ACC1,$IN01_2,${S4}
|
||||
umlal $ACC2,$IN01_2,${R0}
|
||||
|
||||
add $IN01_1,$IN01_1,$H1
|
||||
umlal $ACC3,$IN01_0,${R3}
|
||||
umlal $ACC0,$IN01_0,${R0}
|
||||
umlal $ACC4,$IN01_0,${R4}
|
||||
umlal $ACC1,$IN01_0,${R1}
|
||||
umlal $ACC2,$IN01_0,${R2}
|
||||
|
||||
add $IN01_3,$IN01_3,$H3
|
||||
umlal $ACC3,$IN01_1,${R2}
|
||||
umlal $ACC0,$IN01_1,${S4}
|
||||
umlal $ACC4,$IN01_1,${R3}
|
||||
umlal $ACC1,$IN01_1,${R0}
|
||||
umlal $ACC2,$IN01_1,${R1}
|
||||
|
||||
add $IN01_4,$IN01_4,$H4
|
||||
umlal $ACC3,$IN01_3,${R0}
|
||||
umlal $ACC0,$IN01_3,${S2}
|
||||
umlal $ACC4,$IN01_3,${R1}
|
||||
umlal $ACC1,$IN01_3,${S3}
|
||||
umlal $ACC2,$IN01_3,${S4}
|
||||
|
||||
umlal $ACC3,$IN01_4,${S4}
|
||||
umlal $ACC0,$IN01_4,${S1}
|
||||
umlal $ACC4,$IN01_4,${R0}
|
||||
umlal $ACC1,$IN01_4,${S2}
|
||||
umlal $ACC2,$IN01_4,${S3}
|
||||
|
||||
.Lshort_tail:
|
||||
////////////////////////////////////////////////////////////////
|
||||
// horizontal add
|
||||
|
||||
addp $ACC3,$ACC3,$ACC3
|
||||
ldp d8,d9,[sp,#16] // meet ABI requirements
|
||||
addp $ACC0,$ACC0,$ACC0
|
||||
ldp d10,d11,[sp,#32]
|
||||
addp $ACC4,$ACC4,$ACC4
|
||||
ldp d12,d13,[sp,#48]
|
||||
addp $ACC1,$ACC1,$ACC1
|
||||
ldp d14,d15,[sp,#64]
|
||||
addp $ACC2,$ACC2,$ACC2
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// lazy reduction, but without narrowing
|
||||
|
||||
ushr $T0.2d,$ACC3,#26
|
||||
and $ACC3,$ACC3,$MASK.2d
|
||||
ushr $T1.2d,$ACC0,#26
|
||||
and $ACC0,$ACC0,$MASK.2d
|
||||
|
||||
add $ACC4,$ACC4,$T0.2d // h3 -> h4
|
||||
add $ACC1,$ACC1,$T1.2d // h0 -> h1
|
||||
|
||||
ushr $T0.2d,$ACC4,#26
|
||||
and $ACC4,$ACC4,$MASK.2d
|
||||
ushr $T1.2d,$ACC1,#26
|
||||
and $ACC1,$ACC1,$MASK.2d
|
||||
add $ACC2,$ACC2,$T1.2d // h1 -> h2
|
||||
|
||||
add $ACC0,$ACC0,$T0.2d
|
||||
shl $T0.2d,$T0.2d,#2
|
||||
ushr $T1.2d,$ACC2,#26
|
||||
and $ACC2,$ACC2,$MASK.2d
|
||||
add $ACC0,$ACC0,$T0.2d // h4 -> h0
|
||||
add $ACC3,$ACC3,$T1.2d // h2 -> h3
|
||||
|
||||
ushr $T0.2d,$ACC0,#26
|
||||
and $ACC0,$ACC0,$MASK.2d
|
||||
ushr $T1.2d,$ACC3,#26
|
||||
and $ACC3,$ACC3,$MASK.2d
|
||||
add $ACC1,$ACC1,$T0.2d // h0 -> h1
|
||||
add $ACC4,$ACC4,$T1.2d // h3 -> h4
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// write the result, can be partially reduced
|
||||
|
||||
st4 {$ACC0,$ACC1,$ACC2,$ACC3}[0],[$ctx],#16
|
||||
st1 {$ACC4}[0],[$ctx]
|
||||
|
||||
.Lno_data_neon:
|
||||
ldr x29,[sp],#80
|
||||
ret
|
||||
.size poly1305_blocks_neon,.-poly1305_blocks_neon
|
||||
|
||||
.type poly1305_emit_neon,%function
|
||||
.align 5
|
||||
poly1305_emit_neon:
|
||||
ldr $is_base2_26,[$ctx,#24]
|
||||
cbz $is_base2_26,poly1305_emit
|
||||
|
||||
ldp w10,w11,[$ctx] // load hash value base 2^26
|
||||
ldp w12,w13,[$ctx,#8]
|
||||
ldr w14,[$ctx,#16]
|
||||
|
||||
add $h0,x10,x11,lsl#26 // base 2^26 -> base 2^64
|
||||
lsr $h1,x12,#12
|
||||
adds $h0,$h0,x12,lsl#52
|
||||
add $h1,$h1,x13,lsl#14
|
||||
adc $h1,$h1,xzr
|
||||
lsr $h2,x14,#24
|
||||
adds $h1,$h1,x14,lsl#40
|
||||
adc $h2,$h2,xzr // can be partially reduced...
|
||||
|
||||
ldp $t0,$t1,[$nonce] // load nonce
|
||||
|
||||
and $d0,$h2,#-4 // ... so reduce
|
||||
add $d0,$d0,$h2,lsr#2
|
||||
and $h2,$h2,#3
|
||||
adds $h0,$h0,$d0
|
||||
adcs $h1,$h1,xzr
|
||||
adc $h2,$h2,xzr
|
||||
|
||||
adds $d0,$h0,#5 // compare to modulus
|
||||
adcs $d1,$h1,xzr
|
||||
adc $d2,$h2,xzr
|
||||
|
||||
tst $d2,#-4 // see if it's carried/borrowed
|
||||
|
||||
csel $h0,$h0,$d0,eq
|
||||
csel $h1,$h1,$d1,eq
|
||||
|
||||
#ifdef __AARCH64EB__
|
||||
ror $t0,$t0,#32 // flip nonce words
|
||||
ror $t1,$t1,#32
|
||||
#endif
|
||||
adds $h0,$h0,$t0 // accumulate nonce
|
||||
adc $h1,$h1,$t1
|
||||
#ifdef __AARCH64EB__
|
||||
rev $h0,$h0 // flip output bytes
|
||||
rev $h1,$h1
|
||||
#endif
|
||||
stp $h0,$h1,[$mac] // write result
|
||||
|
||||
ret
|
||||
.size poly1305_emit_neon,.-poly1305_emit_neon
|
||||
#endif
|
||||
|
||||
.align 5
|
||||
.Lzeros:
|
||||
.long 0,0,0,0,0,0,0,0
|
||||
#ifndef __KERNEL__
|
||||
.LOPENSSL_armcap_P:
|
||||
#ifdef __ILP32__
|
||||
.long OPENSSL_armcap_P-.
|
||||
#else
|
||||
.quad OPENSSL_armcap_P-.
|
||||
#endif
|
||||
#endif
|
||||
.align 2
|
||||
___
|
||||
|
||||
open SELF,$0;
|
||||
while(<SELF>) {
|
||||
next if (/^#!/);
|
||||
last if (!s/^#/\/\// and !/^$/);
|
||||
print;
|
||||
}
|
||||
close SELF;
|
||||
|
||||
foreach (split("\n",$code)) {
|
||||
s/\b(shrn\s+v[0-9]+)\.[24]d/$1.2s/ or
|
||||
s/\b(fmov\s+)v([0-9]+)[^,]*,\s*x([0-9]+)/$1d$2,x$3/ or
|
||||
(m/\bdup\b/ and (s/\.[24]s/.2d/g or 1)) or
|
||||
(m/\b(eor|and)/ and (s/\.[248][sdh]/.16b/g or 1)) or
|
||||
(m/\bum(ul|la)l\b/ and (s/\.4s/.2s/g or 1)) or
|
||||
(m/\bum(ul|la)l2\b/ and (s/\.2s/.4s/g or 1)) or
|
||||
(m/\bst[1-4]\s+{[^}]+}\[/ and (s/\.[24]d/.s/g or 1));
|
||||
|
||||
s/\.[124]([sd])\[/.$1\[/;
|
||||
|
||||
print $_,"\n";
|
||||
}
|
||||
close STDOUT;
|
205
sys/dev/if_wg/module/crypto/zinc/poly1305/poly1305-donna32.c
Normal file
205
sys/dev/if_wg/module/crypto/zinc/poly1305/poly1305-donna32.c
Normal file
@ -0,0 +1,205 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*
|
||||
* This is based in part on Andrew Moon's poly1305-donna, which is in the
|
||||
* public domain.
|
||||
*/
|
||||
|
||||
struct poly1305_internal {
|
||||
u32 h[5];
|
||||
u32 r[5];
|
||||
u32 s[4];
|
||||
};
|
||||
|
||||
static void poly1305_init_generic(void *ctx, const u8 key[16])
|
||||
{
|
||||
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
|
||||
|
||||
/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
|
||||
st->r[0] = (get_unaligned_le32(&key[0])) & 0x3ffffff;
|
||||
st->r[1] = (get_unaligned_le32(&key[3]) >> 2) & 0x3ffff03;
|
||||
st->r[2] = (get_unaligned_le32(&key[6]) >> 4) & 0x3ffc0ff;
|
||||
st->r[3] = (get_unaligned_le32(&key[9]) >> 6) & 0x3f03fff;
|
||||
st->r[4] = (get_unaligned_le32(&key[12]) >> 8) & 0x00fffff;
|
||||
|
||||
/* s = 5*r */
|
||||
st->s[0] = st->r[1] * 5;
|
||||
st->s[1] = st->r[2] * 5;
|
||||
st->s[2] = st->r[3] * 5;
|
||||
st->s[3] = st->r[4] * 5;
|
||||
|
||||
/* h = 0 */
|
||||
st->h[0] = 0;
|
||||
st->h[1] = 0;
|
||||
st->h[2] = 0;
|
||||
st->h[3] = 0;
|
||||
st->h[4] = 0;
|
||||
}
|
||||
|
||||
static void poly1305_blocks_generic(void *ctx, const u8 *input, size_t len,
|
||||
const u32 padbit)
|
||||
{
|
||||
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
|
||||
const u32 hibit = padbit << 24;
|
||||
u32 r0, r1, r2, r3, r4;
|
||||
u32 s1, s2, s3, s4;
|
||||
u32 h0, h1, h2, h3, h4;
|
||||
u64 d0, d1, d2, d3, d4;
|
||||
u32 c;
|
||||
|
||||
r0 = st->r[0];
|
||||
r1 = st->r[1];
|
||||
r2 = st->r[2];
|
||||
r3 = st->r[3];
|
||||
r4 = st->r[4];
|
||||
|
||||
s1 = st->s[0];
|
||||
s2 = st->s[1];
|
||||
s3 = st->s[2];
|
||||
s4 = st->s[3];
|
||||
|
||||
h0 = st->h[0];
|
||||
h1 = st->h[1];
|
||||
h2 = st->h[2];
|
||||
h3 = st->h[3];
|
||||
h4 = st->h[4];
|
||||
|
||||
while (len >= POLY1305_BLOCK_SIZE) {
|
||||
/* h += m[i] */
|
||||
h0 += (get_unaligned_le32(&input[0])) & 0x3ffffff;
|
||||
h1 += (get_unaligned_le32(&input[3]) >> 2) & 0x3ffffff;
|
||||
h2 += (get_unaligned_le32(&input[6]) >> 4) & 0x3ffffff;
|
||||
h3 += (get_unaligned_le32(&input[9]) >> 6) & 0x3ffffff;
|
||||
h4 += (get_unaligned_le32(&input[12]) >> 8) | hibit;
|
||||
|
||||
/* h *= r */
|
||||
d0 = ((u64)h0 * r0) + ((u64)h1 * s4) +
|
||||
((u64)h2 * s3) + ((u64)h3 * s2) +
|
||||
((u64)h4 * s1);
|
||||
d1 = ((u64)h0 * r1) + ((u64)h1 * r0) +
|
||||
((u64)h2 * s4) + ((u64)h3 * s3) +
|
||||
((u64)h4 * s2);
|
||||
d2 = ((u64)h0 * r2) + ((u64)h1 * r1) +
|
||||
((u64)h2 * r0) + ((u64)h3 * s4) +
|
||||
((u64)h4 * s3);
|
||||
d3 = ((u64)h0 * r3) + ((u64)h1 * r2) +
|
||||
((u64)h2 * r1) + ((u64)h3 * r0) +
|
||||
((u64)h4 * s4);
|
||||
d4 = ((u64)h0 * r4) + ((u64)h1 * r3) +
|
||||
((u64)h2 * r2) + ((u64)h3 * r1) +
|
||||
((u64)h4 * r0);
|
||||
|
||||
/* (partial) h %= p */
|
||||
c = (u32)(d0 >> 26);
|
||||
h0 = (u32)d0 & 0x3ffffff;
|
||||
d1 += c;
|
||||
c = (u32)(d1 >> 26);
|
||||
h1 = (u32)d1 & 0x3ffffff;
|
||||
d2 += c;
|
||||
c = (u32)(d2 >> 26);
|
||||
h2 = (u32)d2 & 0x3ffffff;
|
||||
d3 += c;
|
||||
c = (u32)(d3 >> 26);
|
||||
h3 = (u32)d3 & 0x3ffffff;
|
||||
d4 += c;
|
||||
c = (u32)(d4 >> 26);
|
||||
h4 = (u32)d4 & 0x3ffffff;
|
||||
h0 += c * 5;
|
||||
c = (h0 >> 26);
|
||||
h0 = h0 & 0x3ffffff;
|
||||
h1 += c;
|
||||
|
||||
input += POLY1305_BLOCK_SIZE;
|
||||
len -= POLY1305_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
st->h[0] = h0;
|
||||
st->h[1] = h1;
|
||||
st->h[2] = h2;
|
||||
st->h[3] = h3;
|
||||
st->h[4] = h4;
|
||||
}
|
||||
|
||||
static void poly1305_emit_generic(void *ctx, u8 mac[16], const u32 nonce[4])
|
||||
{
|
||||
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
|
||||
u32 h0, h1, h2, h3, h4, c;
|
||||
u32 g0, g1, g2, g3, g4;
|
||||
u64 f;
|
||||
u32 mask;
|
||||
|
||||
/* fully carry h */
|
||||
h0 = st->h[0];
|
||||
h1 = st->h[1];
|
||||
h2 = st->h[2];
|
||||
h3 = st->h[3];
|
||||
h4 = st->h[4];
|
||||
|
||||
c = h1 >> 26;
|
||||
h1 = h1 & 0x3ffffff;
|
||||
h2 += c;
|
||||
c = h2 >> 26;
|
||||
h2 = h2 & 0x3ffffff;
|
||||
h3 += c;
|
||||
c = h3 >> 26;
|
||||
h3 = h3 & 0x3ffffff;
|
||||
h4 += c;
|
||||
c = h4 >> 26;
|
||||
h4 = h4 & 0x3ffffff;
|
||||
h0 += c * 5;
|
||||
c = h0 >> 26;
|
||||
h0 = h0 & 0x3ffffff;
|
||||
h1 += c;
|
||||
|
||||
/* compute h + -p */
|
||||
g0 = h0 + 5;
|
||||
c = g0 >> 26;
|
||||
g0 &= 0x3ffffff;
|
||||
g1 = h1 + c;
|
||||
c = g1 >> 26;
|
||||
g1 &= 0x3ffffff;
|
||||
g2 = h2 + c;
|
||||
c = g2 >> 26;
|
||||
g2 &= 0x3ffffff;
|
||||
g3 = h3 + c;
|
||||
c = g3 >> 26;
|
||||
g3 &= 0x3ffffff;
|
||||
g4 = h4 + c - (1UL << 26);
|
||||
|
||||
/* select h if h < p, or h + -p if h >= p */
|
||||
mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
|
||||
g0 &= mask;
|
||||
g1 &= mask;
|
||||
g2 &= mask;
|
||||
g3 &= mask;
|
||||
g4 &= mask;
|
||||
mask = ~mask;
|
||||
|
||||
h0 = (h0 & mask) | g0;
|
||||
h1 = (h1 & mask) | g1;
|
||||
h2 = (h2 & mask) | g2;
|
||||
h3 = (h3 & mask) | g3;
|
||||
h4 = (h4 & mask) | g4;
|
||||
|
||||
/* h = h % (2^128) */
|
||||
h0 = ((h0) | (h1 << 26)) & 0xffffffff;
|
||||
h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
|
||||
h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
|
||||
h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
|
||||
|
||||
/* mac = (h + nonce) % (2^128) */
|
||||
f = (u64)h0 + nonce[0];
|
||||
h0 = (u32)f;
|
||||
f = (u64)h1 + nonce[1] + (f >> 32);
|
||||
h1 = (u32)f;
|
||||
f = (u64)h2 + nonce[2] + (f >> 32);
|
||||
h2 = (u32)f;
|
||||
f = (u64)h3 + nonce[3] + (f >> 32);
|
||||
h3 = (u32)f;
|
||||
|
||||
put_unaligned_le32(h0, &mac[0]);
|
||||
put_unaligned_le32(h1, &mac[4]);
|
||||
put_unaligned_le32(h2, &mac[8]);
|
||||
put_unaligned_le32(h3, &mac[12]);
|
||||
}
|
182
sys/dev/if_wg/module/crypto/zinc/poly1305/poly1305-donna64.c
Normal file
182
sys/dev/if_wg/module/crypto/zinc/poly1305/poly1305-donna64.c
Normal file
@ -0,0 +1,182 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*
|
||||
* This is based in part on Andrew Moon's poly1305-donna, which is in the
|
||||
* public domain.
|
||||
*/
|
||||
|
||||
typedef __uint128_t u128;
|
||||
|
||||
struct poly1305_internal {
|
||||
u64 r[3];
|
||||
u64 h[3];
|
||||
u64 s[2];
|
||||
};
|
||||
|
||||
static void poly1305_init_generic(void *ctx, const u8 key[16])
|
||||
{
|
||||
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
|
||||
u64 t0, t1;
|
||||
|
||||
/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
|
||||
t0 = get_unaligned_le64(&key[0]);
|
||||
t1 = get_unaligned_le64(&key[8]);
|
||||
|
||||
st->r[0] = t0 & 0xffc0fffffffULL;
|
||||
st->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffffULL;
|
||||
st->r[2] = ((t1 >> 24)) & 0x00ffffffc0fULL;
|
||||
|
||||
/* s = 20*r */
|
||||
st->s[0] = st->r[1] * 20;
|
||||
st->s[1] = st->r[2] * 20;
|
||||
|
||||
/* h = 0 */
|
||||
st->h[0] = 0;
|
||||
st->h[1] = 0;
|
||||
st->h[2] = 0;
|
||||
}
|
||||
|
||||
static void poly1305_blocks_generic(void *ctx, const u8 *input, size_t len,
|
||||
const u32 padbit)
|
||||
{
|
||||
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
|
||||
const u64 hibit = ((u64)padbit) << 40;
|
||||
u64 r0, r1, r2;
|
||||
u64 s1, s2;
|
||||
u64 h0, h1, h2;
|
||||
u64 c;
|
||||
u128 d0, d1, d2, d;
|
||||
|
||||
r0 = st->r[0];
|
||||
r1 = st->r[1];
|
||||
r2 = st->r[2];
|
||||
|
||||
h0 = st->h[0];
|
||||
h1 = st->h[1];
|
||||
h2 = st->h[2];
|
||||
|
||||
s1 = st->s[0];
|
||||
s2 = st->s[1];
|
||||
|
||||
while (len >= POLY1305_BLOCK_SIZE) {
|
||||
u64 t0, t1;
|
||||
|
||||
/* h += m[i] */
|
||||
t0 = get_unaligned_le64(&input[0]);
|
||||
t1 = get_unaligned_le64(&input[8]);
|
||||
|
||||
h0 += t0 & 0xfffffffffffULL;
|
||||
h1 += ((t0 >> 44) | (t1 << 20)) & 0xfffffffffffULL;
|
||||
h2 += (((t1 >> 24)) & 0x3ffffffffffULL) | hibit;
|
||||
|
||||
/* h *= r */
|
||||
d0 = (u128)h0 * r0;
|
||||
d = (u128)h1 * s2;
|
||||
d0 += d;
|
||||
d = (u128)h2 * s1;
|
||||
d0 += d;
|
||||
d1 = (u128)h0 * r1;
|
||||
d = (u128)h1 * r0;
|
||||
d1 += d;
|
||||
d = (u128)h2 * s2;
|
||||
d1 += d;
|
||||
d2 = (u128)h0 * r2;
|
||||
d = (u128)h1 * r1;
|
||||
d2 += d;
|
||||
d = (u128)h2 * r0;
|
||||
d2 += d;
|
||||
|
||||
/* (partial) h %= p */
|
||||
c = (u64)(d0 >> 44);
|
||||
h0 = (u64)d0 & 0xfffffffffffULL;
|
||||
d1 += c;
|
||||
c = (u64)(d1 >> 44);
|
||||
h1 = (u64)d1 & 0xfffffffffffULL;
|
||||
d2 += c;
|
||||
c = (u64)(d2 >> 42);
|
||||
h2 = (u64)d2 & 0x3ffffffffffULL;
|
||||
h0 += c * 5;
|
||||
c = h0 >> 44;
|
||||
h0 = h0 & 0xfffffffffffULL;
|
||||
h1 += c;
|
||||
|
||||
input += POLY1305_BLOCK_SIZE;
|
||||
len -= POLY1305_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
st->h[0] = h0;
|
||||
st->h[1] = h1;
|
||||
st->h[2] = h2;
|
||||
}
|
||||
|
||||
static void poly1305_emit_generic(void *ctx, u8 mac[16], const u32 nonce[4])
|
||||
{
|
||||
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
|
||||
u64 h0, h1, h2, c;
|
||||
u64 g0, g1, g2;
|
||||
u64 t0, t1;
|
||||
|
||||
/* fully carry h */
|
||||
h0 = st->h[0];
|
||||
h1 = st->h[1];
|
||||
h2 = st->h[2];
|
||||
|
||||
c = h1 >> 44;
|
||||
h1 &= 0xfffffffffffULL;
|
||||
h2 += c;
|
||||
c = h2 >> 42;
|
||||
h2 &= 0x3ffffffffffULL;
|
||||
h0 += c * 5;
|
||||
c = h0 >> 44;
|
||||
h0 &= 0xfffffffffffULL;
|
||||
h1 += c;
|
||||
c = h1 >> 44;
|
||||
h1 &= 0xfffffffffffULL;
|
||||
h2 += c;
|
||||
c = h2 >> 42;
|
||||
h2 &= 0x3ffffffffffULL;
|
||||
h0 += c * 5;
|
||||
c = h0 >> 44;
|
||||
h0 &= 0xfffffffffffULL;
|
||||
h1 += c;
|
||||
|
||||
/* compute h + -p */
|
||||
g0 = h0 + 5;
|
||||
c = g0 >> 44;
|
||||
g0 &= 0xfffffffffffULL;
|
||||
g1 = h1 + c;
|
||||
c = g1 >> 44;
|
||||
g1 &= 0xfffffffffffULL;
|
||||
g2 = h2 + c - (1ULL << 42);
|
||||
|
||||
/* select h if h < p, or h + -p if h >= p */
|
||||
c = (g2 >> ((sizeof(u64) * 8) - 1)) - 1;
|
||||
g0 &= c;
|
||||
g1 &= c;
|
||||
g2 &= c;
|
||||
c = ~c;
|
||||
h0 = (h0 & c) | g0;
|
||||
h1 = (h1 & c) | g1;
|
||||
h2 = (h2 & c) | g2;
|
||||
|
||||
/* h = (h + nonce) */
|
||||
t0 = ((u64)nonce[1] << 32) | nonce[0];
|
||||
t1 = ((u64)nonce[3] << 32) | nonce[2];
|
||||
|
||||
h0 += t0 & 0xfffffffffffULL;
|
||||
c = h0 >> 44;
|
||||
h0 &= 0xfffffffffffULL;
|
||||
h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffffULL) + c;
|
||||
c = h1 >> 44;
|
||||
h1 &= 0xfffffffffffULL;
|
||||
h2 += (((t1 >> 24)) & 0x3ffffffffffULL) + c;
|
||||
h2 &= 0x3ffffffffffULL;
|
||||
|
||||
/* mac = h % (2^128) */
|
||||
h0 = h0 | (h1 << 44);
|
||||
h1 = (h1 >> 20) | (h2 << 24);
|
||||
|
||||
put_unaligned_le64(h0, &mac[0]);
|
||||
put_unaligned_le64(h1, &mac[8]);
|
||||
}
|
@ -0,0 +1,37 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
asmlinkage void poly1305_init_mips(void *ctx, const u8 key[16]);
|
||||
asmlinkage void poly1305_blocks_mips(void *ctx, const u8 *inp, const size_t len,
|
||||
const u32 padbit);
|
||||
asmlinkage void poly1305_emit_mips(void *ctx, u8 mac[16], const u32 nonce[4]);
|
||||
|
||||
static bool *const poly1305_nobs[] __initconst = { };
|
||||
static void __init poly1305_fpu_init(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool poly1305_init_arch(void *ctx,
|
||||
const u8 key[POLY1305_KEY_SIZE])
|
||||
{
|
||||
poly1305_init_mips(ctx, key);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp,
|
||||
size_t len, const u32 padbit,
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
poly1305_blocks_mips(ctx, inp, len, padbit);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
|
||||
const u32 nonce[4],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
poly1305_emit_mips(ctx, mac, nonce);
|
||||
return true;
|
||||
}
|
407
sys/dev/if_wg/module/crypto/zinc/poly1305/poly1305-mips.S
Normal file
407
sys/dev/if_wg/module/crypto/zinc/poly1305/poly1305-mips.S
Normal file
@ -0,0 +1,407 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com> All Rights Reserved.
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
#define MSB 0
|
||||
#define LSB 3
|
||||
#else
|
||||
#define MSB 3
|
||||
#define LSB 0
|
||||
#endif
|
||||
|
||||
#define POLY1305_BLOCK_SIZE 16
|
||||
.text
|
||||
#define H0 $t0
|
||||
#define H1 $t1
|
||||
#define H2 $t2
|
||||
#define H3 $t3
|
||||
#define H4 $t4
|
||||
|
||||
#define R0 $t5
|
||||
#define R1 $t6
|
||||
#define R2 $t7
|
||||
#define R3 $t8
|
||||
|
||||
#define O0 $s0
|
||||
#define O1 $s4
|
||||
#define O2 $v1
|
||||
#define O3 $t9
|
||||
#define O4 $s5
|
||||
|
||||
#define S1 $s1
|
||||
#define S2 $s2
|
||||
#define S3 $s3
|
||||
|
||||
#define SC $at
|
||||
#define CA $v0
|
||||
|
||||
/* Input arguments */
|
||||
#define poly $a0
|
||||
#define src $a1
|
||||
#define srclen $a2
|
||||
#define hibit $a3
|
||||
|
||||
/* Location in the opaque buffer
|
||||
* R[0..3], CA, H[0..4]
|
||||
*/
|
||||
#define PTR_POLY1305_R(n) ( 0 + (n*4)) ## ($a0)
|
||||
#define PTR_POLY1305_CA (16 ) ## ($a0)
|
||||
#define PTR_POLY1305_H(n) (20 + (n*4)) ## ($a0)
|
||||
|
||||
#define POLY1305_BLOCK_SIZE 16
|
||||
#define POLY1305_STACK_SIZE 32
|
||||
|
||||
.set noat
|
||||
.align 4
|
||||
.globl poly1305_blocks_mips
|
||||
.ent poly1305_blocks_mips
|
||||
poly1305_blocks_mips:
|
||||
.frame $sp, POLY1305_STACK_SIZE, $ra
|
||||
/* srclen &= 0xFFFFFFF0 */
|
||||
ins srclen, $zero, 0, 4
|
||||
|
||||
addiu $sp, -(POLY1305_STACK_SIZE)
|
||||
|
||||
/* check srclen >= 16 bytes */
|
||||
beqz srclen, .Lpoly1305_blocks_mips_end
|
||||
|
||||
/* Calculate last round based on src address pointer.
|
||||
* last round src ptr (srclen) = src + (srclen & 0xFFFFFFF0)
|
||||
*/
|
||||
addu srclen, src
|
||||
|
||||
lw R0, PTR_POLY1305_R(0)
|
||||
lw R1, PTR_POLY1305_R(1)
|
||||
lw R2, PTR_POLY1305_R(2)
|
||||
lw R3, PTR_POLY1305_R(3)
|
||||
|
||||
/* store the used save registers. */
|
||||
sw $s0, 0($sp)
|
||||
sw $s1, 4($sp)
|
||||
sw $s2, 8($sp)
|
||||
sw $s3, 12($sp)
|
||||
sw $s4, 16($sp)
|
||||
sw $s5, 20($sp)
|
||||
|
||||
/* load Hx and Carry */
|
||||
lw CA, PTR_POLY1305_CA
|
||||
lw H0, PTR_POLY1305_H(0)
|
||||
lw H1, PTR_POLY1305_H(1)
|
||||
lw H2, PTR_POLY1305_H(2)
|
||||
lw H3, PTR_POLY1305_H(3)
|
||||
lw H4, PTR_POLY1305_H(4)
|
||||
|
||||
/* Sx = Rx + (Rx >> 2) */
|
||||
srl S1, R1, 2
|
||||
srl S2, R2, 2
|
||||
srl S3, R3, 2
|
||||
addu S1, R1
|
||||
addu S2, R2
|
||||
addu S3, R3
|
||||
|
||||
addiu SC, $zero, 1
|
||||
|
||||
.Lpoly1305_loop:
|
||||
lwl O0, 0+MSB(src)
|
||||
lwl O1, 4+MSB(src)
|
||||
lwl O2, 8+MSB(src)
|
||||
lwl O3,12+MSB(src)
|
||||
lwr O0, 0+LSB(src)
|
||||
lwr O1, 4+LSB(src)
|
||||
lwr O2, 8+LSB(src)
|
||||
lwr O3,12+LSB(src)
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
wsbh O0
|
||||
wsbh O1
|
||||
wsbh O2
|
||||
wsbh O3
|
||||
rotr O0, 16
|
||||
rotr O1, 16
|
||||
rotr O2, 16
|
||||
rotr O3, 16
|
||||
#endif
|
||||
|
||||
/* h0 = (u32)(d0 = (u64)h0 + inp[0] + c 'Carry_previous cycle'); */
|
||||
addu H0, CA
|
||||
sltu CA, H0, CA
|
||||
addu O0, H0
|
||||
sltu H0, O0, H0
|
||||
addu CA, H0
|
||||
|
||||
/* h1 = (u32)(d1 = (u64)h1 + (d0 >> 32) + inp[4]); */
|
||||
addu H1, CA
|
||||
sltu CA, H1, CA
|
||||
addu O1, H1
|
||||
sltu H1, O1, H1
|
||||
addu CA, H1
|
||||
|
||||
/* h2 = (u32)(d2 = (u64)h2 + (d1 >> 32) + inp[8]); */
|
||||
addu H2, CA
|
||||
sltu CA, H2, CA
|
||||
addu O2, H2
|
||||
sltu H2, O2, H2
|
||||
addu CA, H2
|
||||
|
||||
/* h3 = (u32)(d3 = (u64)h3 + (d2 >> 32) + inp[12]); */
|
||||
addu H3, CA
|
||||
sltu CA, H3, CA
|
||||
addu O3, H3
|
||||
sltu H3, O3, H3
|
||||
addu CA, H3
|
||||
|
||||
/* h4 += (u32)(d3 >> 32) + padbit; */
|
||||
addu H4, hibit
|
||||
addu O4, H4, CA
|
||||
|
||||
/* D0 */
|
||||
multu O0, R0
|
||||
maddu O1, S3
|
||||
maddu O2, S2
|
||||
maddu O3, S1
|
||||
mfhi CA
|
||||
mflo H0
|
||||
|
||||
/* D1 */
|
||||
multu O0, R1
|
||||
maddu O1, R0
|
||||
maddu O2, S3
|
||||
maddu O3, S2
|
||||
maddu O4, S1
|
||||
maddu CA, SC
|
||||
mfhi CA
|
||||
mflo H1
|
||||
|
||||
/* D2 */
|
||||
multu O0, R2
|
||||
maddu O1, R1
|
||||
maddu O2, R0
|
||||
maddu O3, S3
|
||||
maddu O4, S2
|
||||
maddu CA, SC
|
||||
mfhi CA
|
||||
mflo H2
|
||||
|
||||
/* D4 */
|
||||
mul H4, O4, R0
|
||||
|
||||
/* D3 */
|
||||
multu O0, R3
|
||||
maddu O1, R2
|
||||
maddu O2, R1
|
||||
maddu O3, R0
|
||||
maddu O4, S3
|
||||
maddu CA, SC
|
||||
mfhi CA
|
||||
mflo H3
|
||||
|
||||
addiu src, POLY1305_BLOCK_SIZE
|
||||
|
||||
/* h4 += (u32)(d3 >> 32); */
|
||||
addu O4, H4, CA
|
||||
/* h4 &= 3 */
|
||||
andi H4, O4, 3
|
||||
/* c = (h4 >> 2) + (h4 & ~3U); */
|
||||
srl CA, O4, 2
|
||||
ins O4, $zero, 0, 2
|
||||
|
||||
addu CA, O4
|
||||
|
||||
/* able to do a 16 byte block. */
|
||||
bne src, srclen, .Lpoly1305_loop
|
||||
|
||||
/* restore the used save registers. */
|
||||
lw $s0, 0($sp)
|
||||
lw $s1, 4($sp)
|
||||
lw $s2, 8($sp)
|
||||
lw $s3, 12($sp)
|
||||
lw $s4, 16($sp)
|
||||
lw $s5, 20($sp)
|
||||
|
||||
/* store Hx and Carry */
|
||||
sw CA, PTR_POLY1305_CA
|
||||
sw H0, PTR_POLY1305_H(0)
|
||||
sw H1, PTR_POLY1305_H(1)
|
||||
sw H2, PTR_POLY1305_H(2)
|
||||
sw H3, PTR_POLY1305_H(3)
|
||||
sw H4, PTR_POLY1305_H(4)
|
||||
|
||||
.Lpoly1305_blocks_mips_end:
|
||||
addiu $sp, POLY1305_STACK_SIZE
|
||||
|
||||
/* Jump Back */
|
||||
jr $ra
|
||||
.end poly1305_blocks_mips
|
||||
.set at
|
||||
|
||||
/* Input arguments CTX=$a0, MAC=$a1, NONCE=$a2 */
|
||||
#define MAC $a1
|
||||
#define NONCE $a2
|
||||
|
||||
#define G0 $t5
|
||||
#define G1 $t6
|
||||
#define G2 $t7
|
||||
#define G3 $t8
|
||||
#define G4 $t9
|
||||
|
||||
.set noat
|
||||
.align 4
|
||||
.globl poly1305_emit_mips
|
||||
.ent poly1305_emit_mips
|
||||
poly1305_emit_mips:
|
||||
/* load Hx and Carry */
|
||||
lw CA, PTR_POLY1305_CA
|
||||
lw H0, PTR_POLY1305_H(0)
|
||||
lw H1, PTR_POLY1305_H(1)
|
||||
lw H2, PTR_POLY1305_H(2)
|
||||
lw H3, PTR_POLY1305_H(3)
|
||||
lw H4, PTR_POLY1305_H(4)
|
||||
|
||||
/* Add left over carry */
|
||||
addu H0, CA
|
||||
sltu CA, H0, CA
|
||||
addu H1, CA
|
||||
sltu CA, H1, CA
|
||||
addu H2, CA
|
||||
sltu CA, H2, CA
|
||||
addu H3, CA
|
||||
sltu CA, H3, CA
|
||||
addu H4, CA
|
||||
|
||||
/* compare to modulus by computing h + -p */
|
||||
addiu G0, H0, 5
|
||||
sltu CA, G0, H0
|
||||
addu G1, H1, CA
|
||||
sltu CA, G1, H1
|
||||
addu G2, H2, CA
|
||||
sltu CA, G2, H2
|
||||
addu G3, H3, CA
|
||||
sltu CA, G3, H3
|
||||
addu G4, H4, CA
|
||||
|
||||
srl SC, G4, 2
|
||||
|
||||
/* if there was carry into 131st bit, h3:h0 = g3:g0 */
|
||||
movn H0, G0, SC
|
||||
movn H1, G1, SC
|
||||
movn H2, G2, SC
|
||||
movn H3, G3, SC
|
||||
|
||||
lwl G0, 0+MSB(NONCE)
|
||||
lwl G1, 4+MSB(NONCE)
|
||||
lwl G2, 8+MSB(NONCE)
|
||||
lwl G3,12+MSB(NONCE)
|
||||
lwr G0, 0+LSB(NONCE)
|
||||
lwr G1, 4+LSB(NONCE)
|
||||
lwr G2, 8+LSB(NONCE)
|
||||
lwr G3,12+LSB(NONCE)
|
||||
|
||||
/* mac = (h + nonce) % (2^128) */
|
||||
addu H0, G0
|
||||
sltu CA, H0, G0
|
||||
|
||||
/* H1 */
|
||||
addu H1, CA
|
||||
sltu CA, H1, CA
|
||||
addu H1, G1
|
||||
sltu G1, H1, G1
|
||||
addu CA, G1
|
||||
|
||||
/* H2 */
|
||||
addu H2, CA
|
||||
sltu CA, H2, CA
|
||||
addu H2, G2
|
||||
sltu G2, H2, G2
|
||||
addu CA, G2
|
||||
|
||||
/* H3 */
|
||||
addu H3, CA
|
||||
addu H3, G3
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
wsbh H0
|
||||
wsbh H1
|
||||
wsbh H2
|
||||
wsbh H3
|
||||
rotr H0, 16
|
||||
rotr H1, 16
|
||||
rotr H2, 16
|
||||
rotr H3, 16
|
||||
#endif
|
||||
|
||||
/* store MAC */
|
||||
swl H0, 0+MSB(MAC)
|
||||
swl H1, 4+MSB(MAC)
|
||||
swl H2, 8+MSB(MAC)
|
||||
swl H3,12+MSB(MAC)
|
||||
swr H0, 0+LSB(MAC)
|
||||
swr H1, 4+LSB(MAC)
|
||||
swr H2, 8+LSB(MAC)
|
||||
swr H3,12+LSB(MAC)
|
||||
|
||||
jr $ra
|
||||
.end poly1305_emit_mips
|
||||
|
||||
#define PR0 $t0
|
||||
#define PR1 $t1
|
||||
#define PR2 $t2
|
||||
#define PR3 $t3
|
||||
#define PT0 $t4
|
||||
|
||||
/* Input arguments CTX=$a0, KEY=$a1 */
|
||||
|
||||
.align 4
|
||||
.globl poly1305_init_mips
|
||||
.ent poly1305_init_mips
|
||||
poly1305_init_mips:
|
||||
lwl PR0, 0+MSB($a1)
|
||||
lwl PR1, 4+MSB($a1)
|
||||
lwl PR2, 8+MSB($a1)
|
||||
lwl PR3,12+MSB($a1)
|
||||
lwr PR0, 0+LSB($a1)
|
||||
lwr PR1, 4+LSB($a1)
|
||||
lwr PR2, 8+LSB($a1)
|
||||
lwr PR3,12+LSB($a1)
|
||||
|
||||
/* store Hx and Carry */
|
||||
sw $zero, PTR_POLY1305_CA
|
||||
sw $zero, PTR_POLY1305_H(0)
|
||||
sw $zero, PTR_POLY1305_H(1)
|
||||
sw $zero, PTR_POLY1305_H(2)
|
||||
sw $zero, PTR_POLY1305_H(3)
|
||||
sw $zero, PTR_POLY1305_H(4)
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
wsbh PR0
|
||||
wsbh PR1
|
||||
wsbh PR2
|
||||
wsbh PR3
|
||||
rotr PR0, 16
|
||||
rotr PR1, 16
|
||||
rotr PR2, 16
|
||||
rotr PR3, 16
|
||||
#endif
|
||||
|
||||
lui PT0, 0x0FFF
|
||||
ori PT0, 0xFFFC
|
||||
|
||||
/* AND 0x0fffffff; */
|
||||
ext PR0, PR0, 0, (32-4)
|
||||
|
||||
/* AND 0x0ffffffc; */
|
||||
and PR1, PT0
|
||||
and PR2, PT0
|
||||
and PR3, PT0
|
||||
|
||||
/* store Rx */
|
||||
sw PR0, PTR_POLY1305_R(0)
|
||||
sw PR1, PTR_POLY1305_R(1)
|
||||
sw PR2, PTR_POLY1305_R(2)
|
||||
sw PR3, PTR_POLY1305_R(3)
|
||||
|
||||
/* Jump Back */
|
||||
jr $ra
|
||||
.end poly1305_init_mips
|
467
sys/dev/if_wg/module/crypto/zinc/poly1305/poly1305-mips64.pl
Executable file
467
sys/dev/if_wg/module/crypto/zinc/poly1305/poly1305-mips64.pl
Executable file
@ -0,0 +1,467 @@
|
||||
#!/usr/bin/env perl
|
||||
# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
|
||||
#
|
||||
# This code is taken from the OpenSSL project but the author, Andy Polyakov,
|
||||
# has relicensed it under the licenses specified in the SPDX header above.
|
||||
# The original headers, including the original license headers, are
|
||||
# included below for completeness.
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
#
|
||||
# Poly1305 hash for MIPS64.
|
||||
#
|
||||
# May 2016
|
||||
#
|
||||
# Numbers are cycles per processed byte with poly1305_blocks alone.
|
||||
#
|
||||
# IALU/gcc
|
||||
# R1x000 5.64/+120% (big-endian)
|
||||
# Octeon II 3.80/+280% (little-endian)
|
||||
|
||||
######################################################################
|
||||
# There is a number of MIPS ABI in use, O32 and N32/64 are most
|
||||
# widely used. Then there is a new contender: NUBI. It appears that if
|
||||
# one picks the latter, it's possible to arrange code in ABI neutral
|
||||
# manner. Therefore let's stick to NUBI register layout:
|
||||
#
|
||||
($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
|
||||
($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
|
||||
($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
|
||||
($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
|
||||
#
|
||||
# The return value is placed in $a0. Following coding rules facilitate
|
||||
# interoperability:
|
||||
#
|
||||
# - never ever touch $tp, "thread pointer", former $gp [o32 can be
|
||||
# excluded from the rule, because it's specified volatile];
|
||||
# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
|
||||
# old code];
|
||||
# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
|
||||
#
|
||||
# For reference here is register layout for N32/64 MIPS ABIs:
|
||||
#
|
||||
# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
|
||||
# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
|
||||
# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
|
||||
# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
|
||||
# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
|
||||
#
|
||||
# <appro@openssl.org>
|
||||
#
|
||||
######################################################################
|
||||
|
||||
$flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64
|
||||
|
||||
die "MIPS64 only" unless ($flavour =~ /64|n32/i);
|
||||
|
||||
$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0;
|
||||
$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000";
|
||||
|
||||
($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
|
||||
($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1);
|
||||
|
||||
$code.=<<___;
|
||||
#if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\
|
||||
defined(_MIPS_ARCH_MIPS64R6)) \\
|
||||
&& !defined(_MIPS_ARCH_MIPS64R2)
|
||||
# define _MIPS_ARCH_MIPS64R2
|
||||
#endif
|
||||
|
||||
#if defined(_MIPS_ARCH_MIPS64R6)
|
||||
# define dmultu(rs,rt)
|
||||
# define mflo(rd,rs,rt) dmulu rd,rs,rt
|
||||
# define mfhi(rd,rs,rt) dmuhu rd,rs,rt
|
||||
#else
|
||||
# define dmultu(rs,rt) dmultu rs,rt
|
||||
# define mflo(rd,rs,rt) mflo rd
|
||||
# define mfhi(rd,rs,rt) mfhi rd
|
||||
#endif
|
||||
|
||||
#ifdef __KERNEL__
|
||||
# define poly1305_init poly1305_init_mips
|
||||
# define poly1305_blocks poly1305_blocks_mips
|
||||
# define poly1305_emit poly1305_emit_mips
|
||||
#endif
|
||||
|
||||
#if defined(__MIPSEB__) && !defined(MIPSEB)
|
||||
# define MIPSEB
|
||||
#endif
|
||||
|
||||
#ifdef MIPSEB
|
||||
# define MSB 0
|
||||
# define LSB 7
|
||||
#else
|
||||
# define MSB 7
|
||||
# define LSB 0
|
||||
#endif
|
||||
|
||||
.text
|
||||
.set noat
|
||||
.set noreorder
|
||||
|
||||
.align 5
|
||||
.globl poly1305_init
|
||||
.ent poly1305_init
|
||||
poly1305_init:
|
||||
.frame $sp,0,$ra
|
||||
.set reorder
|
||||
|
||||
sd $zero,0($ctx)
|
||||
sd $zero,8($ctx)
|
||||
sd $zero,16($ctx)
|
||||
|
||||
beqz $inp,.Lno_key
|
||||
|
||||
#if defined(_MIPS_ARCH_MIPS64R6)
|
||||
ld $in0,0($inp)
|
||||
ld $in1,8($inp)
|
||||
#else
|
||||
ldl $in0,0+MSB($inp)
|
||||
ldl $in1,8+MSB($inp)
|
||||
ldr $in0,0+LSB($inp)
|
||||
ldr $in1,8+LSB($inp)
|
||||
#endif
|
||||
#ifdef MIPSEB
|
||||
# if defined(_MIPS_ARCH_MIPS64R2)
|
||||
dsbh $in0,$in0 # byte swap
|
||||
dsbh $in1,$in1
|
||||
dshd $in0,$in0
|
||||
dshd $in1,$in1
|
||||
# else
|
||||
ori $tmp0,$zero,0xFF
|
||||
dsll $tmp2,$tmp0,32
|
||||
or $tmp0,$tmp2 # 0x000000FF000000FF
|
||||
|
||||
and $tmp1,$in0,$tmp0 # byte swap
|
||||
and $tmp3,$in1,$tmp0
|
||||
dsrl $tmp2,$in0,24
|
||||
dsrl $tmp4,$in1,24
|
||||
dsll $tmp1,24
|
||||
dsll $tmp3,24
|
||||
and $tmp2,$tmp0
|
||||
and $tmp4,$tmp0
|
||||
dsll $tmp0,8 # 0x0000FF000000FF00
|
||||
or $tmp1,$tmp2
|
||||
or $tmp3,$tmp4
|
||||
and $tmp2,$in0,$tmp0
|
||||
and $tmp4,$in1,$tmp0
|
||||
dsrl $in0,8
|
||||
dsrl $in1,8
|
||||
dsll $tmp2,8
|
||||
dsll $tmp4,8
|
||||
and $in0,$tmp0
|
||||
and $in1,$tmp0
|
||||
or $tmp1,$tmp2
|
||||
or $tmp3,$tmp4
|
||||
or $in0,$tmp1
|
||||
or $in1,$tmp3
|
||||
dsrl $tmp1,$in0,32
|
||||
dsrl $tmp3,$in1,32
|
||||
dsll $in0,32
|
||||
dsll $in1,32
|
||||
or $in0,$tmp1
|
||||
or $in1,$tmp3
|
||||
# endif
|
||||
#endif
|
||||
li $tmp0,1
|
||||
dsll $tmp0,32
|
||||
daddiu $tmp0,-63
|
||||
dsll $tmp0,28
|
||||
daddiu $tmp0,-1 # 0ffffffc0fffffff
|
||||
|
||||
and $in0,$tmp0
|
||||
daddiu $tmp0,-3 # 0ffffffc0ffffffc
|
||||
and $in1,$tmp0
|
||||
|
||||
sd $in0,24($ctx)
|
||||
dsrl $tmp0,$in1,2
|
||||
sd $in1,32($ctx)
|
||||
daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2)
|
||||
sd $tmp0,40($ctx)
|
||||
|
||||
.Lno_key:
|
||||
li $v0,0 # return 0
|
||||
jr $ra
|
||||
.end poly1305_init
|
||||
___
|
||||
{
|
||||
my ($h0,$h1,$h2,$r0,$r1,$s1,$d0,$d1,$d2) =
|
||||
($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2);
|
||||
|
||||
$code.=<<___;
|
||||
.align 5
|
||||
.globl poly1305_blocks
|
||||
.ent poly1305_blocks
|
||||
poly1305_blocks:
|
||||
.set noreorder
|
||||
dsrl $len,4 # number of complete blocks
|
||||
bnez $len,poly1305_blocks_internal
|
||||
nop
|
||||
jr $ra
|
||||
nop
|
||||
.end poly1305_blocks
|
||||
|
||||
.align 5
|
||||
.ent poly1305_blocks_internal
|
||||
poly1305_blocks_internal:
|
||||
.frame $sp,6*8,$ra
|
||||
.mask $SAVED_REGS_MASK,-8
|
||||
.set noreorder
|
||||
dsubu $sp,6*8
|
||||
sd $s5,40($sp)
|
||||
sd $s4,32($sp)
|
||||
___
|
||||
$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
|
||||
sd $s3,24($sp)
|
||||
sd $s2,16($sp)
|
||||
sd $s1,8($sp)
|
||||
sd $s0,0($sp)
|
||||
___
|
||||
$code.=<<___;
|
||||
.set reorder
|
||||
|
||||
ld $h0,0($ctx) # load hash value
|
||||
ld $h1,8($ctx)
|
||||
ld $h2,16($ctx)
|
||||
|
||||
ld $r0,24($ctx) # load key
|
||||
ld $r1,32($ctx)
|
||||
ld $s1,40($ctx)
|
||||
|
||||
.Loop:
|
||||
#if defined(_MIPS_ARCH_MIPS64R6)
|
||||
ld $in0,0($inp) # load input
|
||||
ld $in1,8($inp)
|
||||
#else
|
||||
ldl $in0,0+MSB($inp) # load input
|
||||
ldl $in1,8+MSB($inp)
|
||||
ldr $in0,0+LSB($inp)
|
||||
ldr $in1,8+LSB($inp)
|
||||
#endif
|
||||
daddiu $len,-1
|
||||
daddiu $inp,16
|
||||
#ifdef MIPSEB
|
||||
# if defined(_MIPS_ARCH_MIPS64R2)
|
||||
dsbh $in0,$in0 # byte swap
|
||||
dsbh $in1,$in1
|
||||
dshd $in0,$in0
|
||||
dshd $in1,$in1
|
||||
# else
|
||||
ori $tmp0,$zero,0xFF
|
||||
dsll $tmp2,$tmp0,32
|
||||
or $tmp0,$tmp2 # 0x000000FF000000FF
|
||||
|
||||
and $tmp1,$in0,$tmp0 # byte swap
|
||||
and $tmp3,$in1,$tmp0
|
||||
dsrl $tmp2,$in0,24
|
||||
dsrl $tmp4,$in1,24
|
||||
dsll $tmp1,24
|
||||
dsll $tmp3,24
|
||||
and $tmp2,$tmp0
|
||||
and $tmp4,$tmp0
|
||||
dsll $tmp0,8 # 0x0000FF000000FF00
|
||||
or $tmp1,$tmp2
|
||||
or $tmp3,$tmp4
|
||||
and $tmp2,$in0,$tmp0
|
||||
and $tmp4,$in1,$tmp0
|
||||
dsrl $in0,8
|
||||
dsrl $in1,8
|
||||
dsll $tmp2,8
|
||||
dsll $tmp4,8
|
||||
and $in0,$tmp0
|
||||
and $in1,$tmp0
|
||||
or $tmp1,$tmp2
|
||||
or $tmp3,$tmp4
|
||||
or $in0,$tmp1
|
||||
or $in1,$tmp3
|
||||
dsrl $tmp1,$in0,32
|
||||
dsrl $tmp3,$in1,32
|
||||
dsll $in0,32
|
||||
dsll $in1,32
|
||||
or $in0,$tmp1
|
||||
or $in1,$tmp3
|
||||
# endif
|
||||
#endif
|
||||
daddu $h0,$in0 # accumulate input
|
||||
daddu $h1,$in1
|
||||
sltu $tmp0,$h0,$in0
|
||||
sltu $tmp1,$h1,$in1
|
||||
daddu $h1,$tmp0
|
||||
|
||||
dmultu ($r0,$h0) # h0*r0
|
||||
daddu $h2,$padbit
|
||||
sltu $tmp0,$h1,$tmp0
|
||||
mflo ($d0,$r0,$h0)
|
||||
mfhi ($d1,$r0,$h0)
|
||||
|
||||
dmultu ($s1,$h1) # h1*5*r1
|
||||
daddu $tmp0,$tmp1
|
||||
daddu $h2,$tmp0
|
||||
mflo ($tmp0,$s1,$h1)
|
||||
mfhi ($tmp1,$s1,$h1)
|
||||
|
||||
dmultu ($r1,$h0) # h0*r1
|
||||
daddu $d0,$tmp0
|
||||
daddu $d1,$tmp1
|
||||
mflo ($tmp2,$r1,$h0)
|
||||
mfhi ($d2,$r1,$h0)
|
||||
sltu $tmp0,$d0,$tmp0
|
||||
daddu $d1,$tmp0
|
||||
|
||||
dmultu ($r0,$h1) # h1*r0
|
||||
daddu $d1,$tmp2
|
||||
sltu $tmp2,$d1,$tmp2
|
||||
mflo ($tmp0,$r0,$h1)
|
||||
mfhi ($tmp1,$r0,$h1)
|
||||
daddu $d2,$tmp2
|
||||
|
||||
dmultu ($s1,$h2) # h2*5*r1
|
||||
daddu $d1,$tmp0
|
||||
daddu $d2,$tmp1
|
||||
mflo ($tmp2,$s1,$h2)
|
||||
|
||||
dmultu ($r0,$h2) # h2*r0
|
||||
sltu $tmp0,$d1,$tmp0
|
||||
daddu $d2,$tmp0
|
||||
mflo ($tmp3,$r0,$h2)
|
||||
|
||||
daddu $d1,$tmp2
|
||||
daddu $d2,$tmp3
|
||||
sltu $tmp2,$d1,$tmp2
|
||||
daddu $d2,$tmp2
|
||||
|
||||
li $tmp0,-4 # final reduction
|
||||
and $tmp0,$d2
|
||||
dsrl $tmp1,$d2,2
|
||||
andi $h2,$d2,3
|
||||
daddu $tmp0,$tmp1
|
||||
daddu $h0,$d0,$tmp0
|
||||
sltu $tmp0,$h0,$tmp0
|
||||
daddu $h1,$d1,$tmp0
|
||||
sltu $tmp0,$h1,$tmp0
|
||||
daddu $h2,$h2,$tmp0
|
||||
|
||||
bnez $len,.Loop
|
||||
|
||||
sd $h0,0($ctx) # store hash value
|
||||
sd $h1,8($ctx)
|
||||
sd $h2,16($ctx)
|
||||
|
||||
.set noreorder
|
||||
ld $s5,40($sp) # epilogue
|
||||
ld $s4,32($sp)
|
||||
___
|
||||
$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue
|
||||
ld $s3,24($sp)
|
||||
ld $s2,16($sp)
|
||||
ld $s1,8($sp)
|
||||
ld $s0,0($sp)
|
||||
___
|
||||
$code.=<<___;
|
||||
jr $ra
|
||||
daddu $sp,6*8
|
||||
.end poly1305_blocks_internal
|
||||
___
|
||||
}
|
||||
{
|
||||
my ($ctx,$mac,$nonce) = ($a0,$a1,$a2);
|
||||
|
||||
$code.=<<___;
|
||||
.align 5
|
||||
.globl poly1305_emit
|
||||
.ent poly1305_emit
|
||||
poly1305_emit:
|
||||
.frame $sp,0,$ra
|
||||
.set reorder
|
||||
|
||||
ld $tmp0,0($ctx)
|
||||
ld $tmp1,8($ctx)
|
||||
ld $tmp2,16($ctx)
|
||||
|
||||
daddiu $in0,$tmp0,5 # compare to modulus
|
||||
sltiu $tmp3,$in0,5
|
||||
daddu $in1,$tmp1,$tmp3
|
||||
sltu $tmp3,$in1,$tmp3
|
||||
daddu $tmp2,$tmp2,$tmp3
|
||||
|
||||
dsrl $tmp2,2 # see if it carried/borrowed
|
||||
dsubu $tmp2,$zero,$tmp2
|
||||
nor $tmp3,$zero,$tmp2
|
||||
|
||||
and $in0,$tmp2
|
||||
and $tmp0,$tmp3
|
||||
and $in1,$tmp2
|
||||
and $tmp1,$tmp3
|
||||
or $in0,$tmp0
|
||||
or $in1,$tmp1
|
||||
|
||||
lwu $tmp0,0($nonce) # load nonce
|
||||
lwu $tmp1,4($nonce)
|
||||
lwu $tmp2,8($nonce)
|
||||
lwu $tmp3,12($nonce)
|
||||
dsll $tmp1,32
|
||||
dsll $tmp3,32
|
||||
or $tmp0,$tmp1
|
||||
or $tmp2,$tmp3
|
||||
|
||||
daddu $in0,$tmp0 # accumulate nonce
|
||||
daddu $in1,$tmp2
|
||||
sltu $tmp0,$in0,$tmp0
|
||||
daddu $in1,$tmp0
|
||||
|
||||
dsrl $tmp0,$in0,8 # write mac value
|
||||
dsrl $tmp1,$in0,16
|
||||
dsrl $tmp2,$in0,24
|
||||
sb $in0,0($mac)
|
||||
dsrl $tmp3,$in0,32
|
||||
sb $tmp0,1($mac)
|
||||
dsrl $tmp0,$in0,40
|
||||
sb $tmp1,2($mac)
|
||||
dsrl $tmp1,$in0,48
|
||||
sb $tmp2,3($mac)
|
||||
dsrl $tmp2,$in0,56
|
||||
sb $tmp3,4($mac)
|
||||
dsrl $tmp3,$in1,8
|
||||
sb $tmp0,5($mac)
|
||||
dsrl $tmp0,$in1,16
|
||||
sb $tmp1,6($mac)
|
||||
dsrl $tmp1,$in1,24
|
||||
sb $tmp2,7($mac)
|
||||
|
||||
sb $in1,8($mac)
|
||||
dsrl $tmp2,$in1,32
|
||||
sb $tmp3,9($mac)
|
||||
dsrl $tmp3,$in1,40
|
||||
sb $tmp0,10($mac)
|
||||
dsrl $tmp0,$in1,48
|
||||
sb $tmp1,11($mac)
|
||||
dsrl $tmp1,$in1,56
|
||||
sb $tmp2,12($mac)
|
||||
sb $tmp3,13($mac)
|
||||
sb $tmp0,14($mac)
|
||||
sb $tmp1,15($mac)
|
||||
|
||||
jr $ra
|
||||
.end poly1305_emit
|
||||
.rdata
|
||||
.align 2
|
||||
___
|
||||
}
|
||||
|
||||
open SELF,$0;
|
||||
while(<SELF>) {
|
||||
next if (/^#!/);
|
||||
last if (!s/^#/\/\// and !/^$/);
|
||||
print;
|
||||
}
|
||||
close SELF;
|
||||
|
||||
$output=pop and open STDOUT,">$output";
|
||||
print $code;
|
||||
close STDOUT;
|
||||
|
171
sys/dev/if_wg/module/crypto/zinc/poly1305/poly1305-x86_64-glue.c
Normal file
171
sys/dev/if_wg/module/crypto/zinc/poly1305/poly1305-x86_64-glue.c
Normal file
@ -0,0 +1,171 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifdef __linux__
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/intel-family.h>
|
||||
#else
|
||||
#include <sys/simd-x86_64.h>
|
||||
#endif
|
||||
|
||||
asmlinkage void poly1305_init_x86_64(void *ctx,
|
||||
const u8 key[POLY1305_KEY_SIZE]);
|
||||
asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp,
|
||||
const size_t len, const u32 padbit);
|
||||
asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_MAC_SIZE],
|
||||
const u32 nonce[4]);
|
||||
asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[POLY1305_MAC_SIZE],
|
||||
const u32 nonce[4]);
|
||||
asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, const size_t len,
|
||||
const u32 padbit);
|
||||
asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, const size_t len,
|
||||
const u32 padbit);
|
||||
asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp,
|
||||
const size_t len, const u32 padbit);
|
||||
|
||||
static bool poly1305_use_avx __ro_after_init;
|
||||
static bool poly1305_use_avx2 __ro_after_init;
|
||||
static bool poly1305_use_avx512 __ro_after_init;
|
||||
static bool *const poly1305_nobs[] __initconst = {
|
||||
&poly1305_use_avx, &poly1305_use_avx2, &poly1305_use_avx512 };
|
||||
|
||||
static void __init poly1305_fpu_init(void)
|
||||
{
|
||||
#ifdef __linux__
|
||||
poly1305_use_avx =
|
||||
boot_cpu_has(X86_FEATURE_AVX) &&
|
||||
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
|
||||
poly1305_use_avx2 =
|
||||
boot_cpu_has(X86_FEATURE_AVX) &&
|
||||
boot_cpu_has(X86_FEATURE_AVX2) &&
|
||||
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
|
||||
#ifndef COMPAT_CANNOT_USE_AVX512
|
||||
poly1305_use_avx512 =
|
||||
boot_cpu_has(X86_FEATURE_AVX) &&
|
||||
boot_cpu_has(X86_FEATURE_AVX2) &&
|
||||
boot_cpu_has(X86_FEATURE_AVX512F) &&
|
||||
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
|
||||
XFEATURE_MASK_AVX512, NULL) &&
|
||||
/* Skylake downclocks unacceptably much when using zmm. */
|
||||
boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X;
|
||||
#endif
|
||||
#else
|
||||
|
||||
poly1305_use_avx = !!(cpu_feature2 & CPUID2_AVX) &&
|
||||
__ymm_enabled();
|
||||
poly1305_use_avx2 = poly1305_use_avx &&
|
||||
!!(cpu_stdext_feature & CPUID_STDEXT_AVX2);
|
||||
poly1305_use_avx512 = poly1305_use_avx2 &&
|
||||
!!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) &&
|
||||
__zmm_enabled();
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool poly1305_init_arch(void *ctx,
|
||||
const u8 key[POLY1305_KEY_SIZE])
|
||||
{
|
||||
poly1305_init_x86_64(ctx, key);
|
||||
return true;
|
||||
}
|
||||
|
||||
struct poly1305_arch_internal {
|
||||
union {
|
||||
struct {
|
||||
u32 h[5];
|
||||
u32 is_base2_26;
|
||||
};
|
||||
u64 hs[3];
|
||||
};
|
||||
u64 r[2];
|
||||
u64 pad;
|
||||
struct { u32 r2, r1, r4, r3; } rn[9];
|
||||
};
|
||||
|
||||
/* The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit
|
||||
* the unfortunate situation of using AVX and then having to go back to scalar
|
||||
* -- because the user is silly and has called the update function from two
|
||||
* separate contexts -- then we need to convert back to the original base before
|
||||
* proceeding. It is possible to reason that the initial reduction below is
|
||||
* sufficient given the implementation invariants. However, for an avoidance of
|
||||
* doubt and because this is not performance critical, we do the full reduction
|
||||
* anyway.
|
||||
*/
|
||||
static void convert_to_base2_64(void *ctx)
|
||||
{
|
||||
struct poly1305_arch_internal *state = ctx;
|
||||
u32 cy;
|
||||
|
||||
if (!state->is_base2_26)
|
||||
return;
|
||||
|
||||
cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy;
|
||||
cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy;
|
||||
cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy;
|
||||
cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy;
|
||||
state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0];
|
||||
state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12);
|
||||
state->hs[2] = state->h[4] >> 24;
|
||||
#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
|
||||
cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL);
|
||||
state->hs[2] &= 3;
|
||||
state->hs[0] += cy;
|
||||
state->hs[1] += (cy = ULT(state->hs[0], cy));
|
||||
state->hs[2] += ULT(state->hs[1], cy);
|
||||
#undef ULT
|
||||
state->is_base2_26 = 0;
|
||||
}
|
||||
|
||||
static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp,
|
||||
size_t len, const u32 padbit,
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
struct poly1305_arch_internal *state = ctx;
|
||||
|
||||
/* SIMD disables preemption, so relax after processing each page. */
|
||||
BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE ||
|
||||
PAGE_SIZE % POLY1305_BLOCK_SIZE);
|
||||
|
||||
if (!poly1305_use_avx ||
|
||||
(len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) ||
|
||||
!simd_use(simd_context)) {
|
||||
convert_to_base2_64(ctx);
|
||||
poly1305_blocks_x86_64(ctx, inp, len, padbit);
|
||||
return true;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
const size_t bytes = min_t(size_t, len, PAGE_SIZE);
|
||||
|
||||
if (poly1305_use_avx512)
|
||||
poly1305_blocks_avx512(ctx, inp, bytes, padbit);
|
||||
else if (poly1305_use_avx2)
|
||||
poly1305_blocks_avx2(ctx, inp, bytes, padbit);
|
||||
else
|
||||
poly1305_blocks_avx(ctx, inp, bytes, padbit);
|
||||
len -= bytes;
|
||||
if (!len)
|
||||
break;
|
||||
inp += bytes;
|
||||
simd_relax(simd_context);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
|
||||
const u32 nonce[4],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
struct poly1305_arch_internal *state = ctx;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_AS_AVX) || !poly1305_use_avx ||
|
||||
!state->is_base2_26 || !simd_use(simd_context)) {
|
||||
convert_to_base2_64(ctx);
|
||||
poly1305_emit_x86_64(ctx, mac, nonce);
|
||||
} else
|
||||
poly1305_emit_avx(ctx, mac, nonce);
|
||||
return true;
|
||||
}
|
4266
sys/dev/if_wg/module/crypto/zinc/poly1305/poly1305-x86_64.pl
Executable file
4266
sys/dev/if_wg/module/crypto/zinc/poly1305/poly1305-x86_64.pl
Executable file
File diff suppressed because it is too large
Load Diff
163
sys/dev/if_wg/module/crypto/zinc/poly1305/poly1305.c
Normal file
163
sys/dev/if_wg/module/crypto/zinc/poly1305/poly1305.c
Normal file
@ -0,0 +1,163 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*
|
||||
* Implementation of the Poly1305 message authenticator.
|
||||
*
|
||||
* Information: https://cr.yp.to/mac.html
|
||||
*/
|
||||
|
||||
#include <sys/support.h>
|
||||
#include <zinc/poly1305.h>
|
||||
#include "../selftest/run.h"
|
||||
|
||||
#if defined(CONFIG_ZINC_ARCH_X86_64)
|
||||
#include "poly1305-x86_64-glue.c"
|
||||
#elif defined(CONFIG_ZINC_ARCH_ARM) || defined(CONFIG_ZINC_ARCH_ARM64)
|
||||
#include "poly1305-arm-glue.c"
|
||||
#elif defined(CONFIG_ZINC_ARCH_MIPS) || defined(CONFIG_ZINC_ARCH_MIPS64)
|
||||
#include "poly1305-mips-glue.c"
|
||||
#else
|
||||
static inline bool poly1305_init_arch(void *ctx,
|
||||
const u8 key[POLY1305_KEY_SIZE])
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool poly1305_blocks_arch(void *ctx, const u8 *input,
|
||||
size_t len, const u32 padbit,
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
|
||||
const u32 nonce[4],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static bool *const poly1305_nobs[] __initconst = { };
|
||||
static void __init poly1305_fpu_init(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
|
||||
#include "poly1305-donna64.c"
|
||||
#else
|
||||
#include "poly1305-donna32.c"
|
||||
#endif
|
||||
|
||||
void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE])
|
||||
{
|
||||
ctx->nonce[0] = get_unaligned_le32(&key[16]);
|
||||
ctx->nonce[1] = get_unaligned_le32(&key[20]);
|
||||
ctx->nonce[2] = get_unaligned_le32(&key[24]);
|
||||
ctx->nonce[3] = get_unaligned_le32(&key[28]);
|
||||
|
||||
if (!poly1305_init_arch(ctx->opaque, key))
|
||||
poly1305_init_generic(ctx->opaque, key);
|
||||
|
||||
ctx->num = 0;
|
||||
}
|
||||
EXPORT_SYMBOL(poly1305_init);
|
||||
|
||||
static inline void poly1305_blocks(void *ctx, const u8 *input, const size_t len,
|
||||
const u32 padbit,
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
if (!poly1305_blocks_arch(ctx, input, len, padbit, simd_context))
|
||||
poly1305_blocks_generic(ctx, input, len, padbit);
|
||||
}
|
||||
|
||||
static inline void poly1305_emit(void *ctx, u8 mac[POLY1305_KEY_SIZE],
|
||||
const u32 nonce[4],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
if (!poly1305_emit_arch(ctx, mac, nonce, simd_context))
|
||||
poly1305_emit_generic(ctx, mac, nonce);
|
||||
}
|
||||
|
||||
void poly1305_update(struct poly1305_ctx *ctx, const u8 *input, size_t len,
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
const size_t num = ctx->num;
|
||||
size_t rem;
|
||||
|
||||
if (num) {
|
||||
rem = POLY1305_BLOCK_SIZE - num;
|
||||
if (len < rem) {
|
||||
memcpy(ctx->data + num, input, len);
|
||||
ctx->num = num + len;
|
||||
return;
|
||||
}
|
||||
memcpy(ctx->data + num, input, rem);
|
||||
poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 1,
|
||||
simd_context);
|
||||
input += rem;
|
||||
len -= rem;
|
||||
}
|
||||
|
||||
rem = len % POLY1305_BLOCK_SIZE;
|
||||
len -= rem;
|
||||
|
||||
if (len >= POLY1305_BLOCK_SIZE) {
|
||||
poly1305_blocks(ctx->opaque, input, len, 1, simd_context);
|
||||
input += len;
|
||||
}
|
||||
|
||||
if (rem)
|
||||
memcpy(ctx->data, input, rem);
|
||||
|
||||
ctx->num = rem;
|
||||
}
|
||||
EXPORT_SYMBOL(poly1305_update);
|
||||
|
||||
void poly1305_final(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
size_t num = ctx->num;
|
||||
|
||||
if (num) {
|
||||
ctx->data[num++] = 1;
|
||||
while (num < POLY1305_BLOCK_SIZE)
|
||||
ctx->data[num++] = 0;
|
||||
poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 0,
|
||||
simd_context);
|
||||
}
|
||||
|
||||
poly1305_emit(ctx->opaque, mac, ctx->nonce, simd_context);
|
||||
|
||||
memzero_explicit(ctx, sizeof(*ctx));
|
||||
}
|
||||
EXPORT_SYMBOL(poly1305_final);
|
||||
|
||||
#include "../selftest/poly1305.c"
|
||||
|
||||
static bool nosimd __initdata = false;
|
||||
|
||||
#ifndef COMPAT_ZINC_IS_A_MODULE
|
||||
int __init poly1305_mod_init(void)
|
||||
#else
|
||||
static int __init mod_init(void)
|
||||
#endif
|
||||
{
|
||||
if (!nosimd)
|
||||
poly1305_fpu_init();
|
||||
if (!selftest_run("poly1305", poly1305_selftest, poly1305_nobs,
|
||||
ARRAY_SIZE(poly1305_nobs)))
|
||||
return -ENOTRECOVERABLE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef COMPAT_ZINC_IS_A_MODULE
|
||||
static void __exit mod_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
module_param(nosimd, bool, 0);
|
||||
module_init(mod_init);
|
||||
module_exit(mod_exit);
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_DESCRIPTION("Poly1305 one-time authenticator");
|
||||
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
|
||||
#endif
|
2090
sys/dev/if_wg/module/crypto/zinc/selftest/blake2s.c
Normal file
2090
sys/dev/if_wg/module/crypto/zinc/selftest/blake2s.c
Normal file
File diff suppressed because it is too large
Load Diff
2703
sys/dev/if_wg/module/crypto/zinc/selftest/chacha20.c
Normal file
2703
sys/dev/if_wg/module/crypto/zinc/selftest/chacha20.c
Normal file
File diff suppressed because it is too large
Load Diff
8443
sys/dev/if_wg/module/crypto/zinc/selftest/chacha20poly1305.c
Normal file
8443
sys/dev/if_wg/module/crypto/zinc/selftest/chacha20poly1305.c
Normal file
File diff suppressed because it is too large
Load Diff
1315
sys/dev/if_wg/module/crypto/zinc/selftest/curve25519.c
Normal file
1315
sys/dev/if_wg/module/crypto/zinc/selftest/curve25519.c
Normal file
File diff suppressed because it is too large
Load Diff
1110
sys/dev/if_wg/module/crypto/zinc/selftest/poly1305.c
Normal file
1110
sys/dev/if_wg/module/crypto/zinc/selftest/poly1305.c
Normal file
File diff suppressed because it is too large
Load Diff
43
sys/dev/if_wg/module/crypto/zinc/selftest/run.h
Normal file
43
sys/dev/if_wg/module/crypto/zinc/selftest/run.h
Normal file
@ -0,0 +1,43 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ZINC_SELFTEST_RUN_H
|
||||
#define _ZINC_SELFTEST_RUN_H
|
||||
|
||||
static inline bool selftest_run(const char *name, bool (*selftest)(void),
|
||||
bool *const nobs[], unsigned int nobs_len)
|
||||
{
|
||||
unsigned long set = 0, subset = 0, largest_subset = 0;
|
||||
unsigned int i;
|
||||
bool failed;
|
||||
|
||||
MPASS(nobs_len <= BITS_PER_LONG);
|
||||
failed = false;
|
||||
|
||||
for (i = 0; i < nobs_len; ++i)
|
||||
set |= ((unsigned long)*nobs[i]) << i;
|
||||
|
||||
do {
|
||||
for (i = 0; i < nobs_len; ++i)
|
||||
*nobs[i] = BIT(i) & subset;
|
||||
if (selftest())
|
||||
largest_subset = max(subset, largest_subset);
|
||||
else {
|
||||
failed = true;
|
||||
pr_err("%s self-test combination 0x%lx: FAIL\n", name,
|
||||
subset);
|
||||
}
|
||||
subset = (subset - set) & set;
|
||||
} while (subset);
|
||||
|
||||
for (i = 0; i < nobs_len; ++i)
|
||||
*nobs[i] = BIT(i) & largest_subset;
|
||||
|
||||
if (largest_subset == set && !failed && bootverbose)
|
||||
pr_info("%s self-tests: pass\n", name);
|
||||
|
||||
return !WARN_ON(largest_subset != set);
|
||||
}
|
||||
#endif
|
866
sys/dev/if_wg/module/curve25519.c
Normal file
866
sys/dev/if_wg/module/curve25519.c
Normal file
@ -0,0 +1,866 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2016 The fiat-crypto Authors.
|
||||
* Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*
|
||||
* This is a machine-generated formally verified implementation of Curve25519
|
||||
* ECDH from: <https://github.com/mit-plv/fiat-crypto>. Though originally
|
||||
* machine generated, it has been tweaked to be suitable for use in the kernel.
|
||||
* It is optimized for 32-bit machines and machines that cannot work efficiently
|
||||
* with 128-bit integer types.
|
||||
*/
|
||||
|
||||
|
||||
/* Added for compatibility */
|
||||
#include <sys/types.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/support.h>
|
||||
#include <crypto/curve25519.h>
|
||||
|
||||
/* fe means field element. Here the field is \Z/(2^255-19). An element t,
|
||||
* entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
|
||||
* t[3]+2^102 t[4]+...+2^230 t[9].
|
||||
* fe limbs are bounded by 1.125*2^26,1.125*2^25,1.125*2^26,1.125*2^25,etc.
|
||||
* Multiplication and carrying produce fe from fe_loose.
|
||||
*/
|
||||
typedef struct fe { u32 v[10]; } fe;
|
||||
|
||||
/* fe_loose limbs are bounded by 3.375*2^26,3.375*2^25,3.375*2^26,3.375*2^25,etc
|
||||
* Addition and subtraction produce fe_loose from (fe, fe).
|
||||
*/
|
||||
typedef struct fe_loose { u32 v[10]; } fe_loose;
|
||||
|
||||
static __always_inline void fe_frombytes_impl(u32 h[10], const u8 *s)
|
||||
{
|
||||
/* Ignores top bit of s. */
|
||||
u32 a0 = get_unaligned_le32(s);
|
||||
u32 a1 = get_unaligned_le32(s+4);
|
||||
u32 a2 = get_unaligned_le32(s+8);
|
||||
u32 a3 = get_unaligned_le32(s+12);
|
||||
u32 a4 = get_unaligned_le32(s+16);
|
||||
u32 a5 = get_unaligned_le32(s+20);
|
||||
u32 a6 = get_unaligned_le32(s+24);
|
||||
u32 a7 = get_unaligned_le32(s+28);
|
||||
h[0] = a0&((1<<26)-1); /* 26 used, 32-26 left. 26 */
|
||||
h[1] = (a0>>26) | ((a1&((1<<19)-1))<< 6); /* (32-26) + 19 = 6+19 = 25 */
|
||||
h[2] = (a1>>19) | ((a2&((1<<13)-1))<<13); /* (32-19) + 13 = 13+13 = 26 */
|
||||
h[3] = (a2>>13) | ((a3&((1<< 6)-1))<<19); /* (32-13) + 6 = 19+ 6 = 25 */
|
||||
h[4] = (a3>> 6); /* (32- 6) = 26 */
|
||||
h[5] = a4&((1<<25)-1); /* 25 */
|
||||
h[6] = (a4>>25) | ((a5&((1<<19)-1))<< 7); /* (32-25) + 19 = 7+19 = 26 */
|
||||
h[7] = (a5>>19) | ((a6&((1<<12)-1))<<13); /* (32-19) + 12 = 13+12 = 25 */
|
||||
h[8] = (a6>>12) | ((a7&((1<< 6)-1))<<20); /* (32-12) + 6 = 20+ 6 = 26 */
|
||||
h[9] = (a7>> 6)&((1<<25)-1); /* 25 */
|
||||
}
|
||||
|
||||
static __always_inline void fe_frombytes(fe *h, const u8 *s)
|
||||
{
|
||||
fe_frombytes_impl(h->v, s);
|
||||
}
|
||||
|
||||
static __always_inline u8 /*bool*/
|
||||
addcarryx_u25(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
|
||||
{
|
||||
/* This function extracts 25 bits of result and 1 bit of carry
|
||||
* (26 total), so a 32-bit intermediate is sufficient.
|
||||
*/
|
||||
u32 x = a + b + c;
|
||||
*low = x & ((1 << 25) - 1);
|
||||
return (x >> 25) & 1;
|
||||
}
|
||||
|
||||
static __always_inline u8 /*bool*/
|
||||
addcarryx_u26(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
|
||||
{
|
||||
/* This function extracts 26 bits of result and 1 bit of carry
|
||||
* (27 total), so a 32-bit intermediate is sufficient.
|
||||
*/
|
||||
u32 x = a + b + c;
|
||||
*low = x & ((1 << 26) - 1);
|
||||
return (x >> 26) & 1;
|
||||
}
|
||||
|
||||
static __always_inline u8 /*bool*/
|
||||
subborrow_u25(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
|
||||
{
|
||||
/* This function extracts 25 bits of result and 1 bit of borrow
|
||||
* (26 total), so a 32-bit intermediate is sufficient.
|
||||
*/
|
||||
u32 x = a - b - c;
|
||||
*low = x & ((1 << 25) - 1);
|
||||
return x >> 31;
|
||||
}
|
||||
|
||||
static __always_inline u8 /*bool*/
|
||||
subborrow_u26(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
|
||||
{
|
||||
/* This function extracts 26 bits of result and 1 bit of borrow
|
||||
*(27 total), so a 32-bit intermediate is sufficient.
|
||||
*/
|
||||
u32 x = a - b - c;
|
||||
*low = x & ((1 << 26) - 1);
|
||||
return x >> 31;
|
||||
}
|
||||
|
||||
static __always_inline u32 cmovznz32(u32 t, u32 z, u32 nz)
|
||||
{
|
||||
t = -!!t; /* all set if nonzero, 0 if 0 */
|
||||
return (t&nz) | ((~t)&z);
|
||||
}
|
||||
|
||||
static __always_inline void fe_freeze(u32 out[10], const u32 in1[10])
|
||||
{
|
||||
{ const u32 x17 = in1[9];
|
||||
{ const u32 x18 = in1[8];
|
||||
{ const u32 x16 = in1[7];
|
||||
{ const u32 x14 = in1[6];
|
||||
{ const u32 x12 = in1[5];
|
||||
{ const u32 x10 = in1[4];
|
||||
{ const u32 x8 = in1[3];
|
||||
{ const u32 x6 = in1[2];
|
||||
{ const u32 x4 = in1[1];
|
||||
{ const u32 x2 = in1[0];
|
||||
{ u32 x20; u8/*bool*/ x21 = subborrow_u26(0x0, x2, 0x3ffffed, &x20);
|
||||
{ u32 x23; u8/*bool*/ x24 = subborrow_u25(x21, x4, 0x1ffffff, &x23);
|
||||
{ u32 x26; u8/*bool*/ x27 = subborrow_u26(x24, x6, 0x3ffffff, &x26);
|
||||
{ u32 x29; u8/*bool*/ x30 = subborrow_u25(x27, x8, 0x1ffffff, &x29);
|
||||
{ u32 x32; u8/*bool*/ x33 = subborrow_u26(x30, x10, 0x3ffffff, &x32);
|
||||
{ u32 x35; u8/*bool*/ x36 = subborrow_u25(x33, x12, 0x1ffffff, &x35);
|
||||
{ u32 x38; u8/*bool*/ x39 = subborrow_u26(x36, x14, 0x3ffffff, &x38);
|
||||
{ u32 x41; u8/*bool*/ x42 = subborrow_u25(x39, x16, 0x1ffffff, &x41);
|
||||
{ u32 x44; u8/*bool*/ x45 = subborrow_u26(x42, x18, 0x3ffffff, &x44);
|
||||
{ u32 x47; u8/*bool*/ x48 = subborrow_u25(x45, x17, 0x1ffffff, &x47);
|
||||
{ u32 x49 = cmovznz32(x48, 0x0, 0xffffffff);
|
||||
{ u32 x50 = (x49 & 0x3ffffed);
|
||||
{ u32 x52; u8/*bool*/ x53 = addcarryx_u26(0x0, x20, x50, &x52);
|
||||
{ u32 x54 = (x49 & 0x1ffffff);
|
||||
{ u32 x56; u8/*bool*/ x57 = addcarryx_u25(x53, x23, x54, &x56);
|
||||
{ u32 x58 = (x49 & 0x3ffffff);
|
||||
{ u32 x60; u8/*bool*/ x61 = addcarryx_u26(x57, x26, x58, &x60);
|
||||
{ u32 x62 = (x49 & 0x1ffffff);
|
||||
{ u32 x64; u8/*bool*/ x65 = addcarryx_u25(x61, x29, x62, &x64);
|
||||
{ u32 x66 = (x49 & 0x3ffffff);
|
||||
{ u32 x68; u8/*bool*/ x69 = addcarryx_u26(x65, x32, x66, &x68);
|
||||
{ u32 x70 = (x49 & 0x1ffffff);
|
||||
{ u32 x72; u8/*bool*/ x73 = addcarryx_u25(x69, x35, x70, &x72);
|
||||
{ u32 x74 = (x49 & 0x3ffffff);
|
||||
{ u32 x76; u8/*bool*/ x77 = addcarryx_u26(x73, x38, x74, &x76);
|
||||
{ u32 x78 = (x49 & 0x1ffffff);
|
||||
{ u32 x80; u8/*bool*/ x81 = addcarryx_u25(x77, x41, x78, &x80);
|
||||
{ u32 x82 = (x49 & 0x3ffffff);
|
||||
{ u32 x84; u8/*bool*/ x85 = addcarryx_u26(x81, x44, x82, &x84);
|
||||
{ u32 x86 = (x49 & 0x1ffffff);
|
||||
{ u32 x88; addcarryx_u25(x85, x47, x86, &x88);
|
||||
out[0] = x52;
|
||||
out[1] = x56;
|
||||
out[2] = x60;
|
||||
out[3] = x64;
|
||||
out[4] = x68;
|
||||
out[5] = x72;
|
||||
out[6] = x76;
|
||||
out[7] = x80;
|
||||
out[8] = x84;
|
||||
out[9] = x88;
|
||||
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
|
||||
}
|
||||
|
||||
static __always_inline void fe_tobytes(u8 s[32], const fe *f)
|
||||
{
|
||||
u32 h[10];
|
||||
fe_freeze(h, f->v);
|
||||
s[0] = h[0] >> 0;
|
||||
s[1] = h[0] >> 8;
|
||||
s[2] = h[0] >> 16;
|
||||
s[3] = (h[0] >> 24) | (h[1] << 2);
|
||||
s[4] = h[1] >> 6;
|
||||
s[5] = h[1] >> 14;
|
||||
s[6] = (h[1] >> 22) | (h[2] << 3);
|
||||
s[7] = h[2] >> 5;
|
||||
s[8] = h[2] >> 13;
|
||||
s[9] = (h[2] >> 21) | (h[3] << 5);
|
||||
s[10] = h[3] >> 3;
|
||||
s[11] = h[3] >> 11;
|
||||
s[12] = (h[3] >> 19) | (h[4] << 6);
|
||||
s[13] = h[4] >> 2;
|
||||
s[14] = h[4] >> 10;
|
||||
s[15] = h[4] >> 18;
|
||||
s[16] = h[5] >> 0;
|
||||
s[17] = h[5] >> 8;
|
||||
s[18] = h[5] >> 16;
|
||||
s[19] = (h[5] >> 24) | (h[6] << 1);
|
||||
s[20] = h[6] >> 7;
|
||||
s[21] = h[6] >> 15;
|
||||
s[22] = (h[6] >> 23) | (h[7] << 3);
|
||||
s[23] = h[7] >> 5;
|
||||
s[24] = h[7] >> 13;
|
||||
s[25] = (h[7] >> 21) | (h[8] << 4);
|
||||
s[26] = h[8] >> 4;
|
||||
s[27] = h[8] >> 12;
|
||||
s[28] = (h[8] >> 20) | (h[9] << 6);
|
||||
s[29] = h[9] >> 2;
|
||||
s[30] = h[9] >> 10;
|
||||
s[31] = h[9] >> 18;
|
||||
}
|
||||
|
||||
/* h = f */
|
||||
static __always_inline void fe_copy(fe *h, const fe *f)
|
||||
{
|
||||
memmove(h, f, sizeof(u32) * 10);
|
||||
}
|
||||
|
||||
static __always_inline void fe_copy_lt(fe_loose *h, const fe *f)
|
||||
{
|
||||
memmove(h, f, sizeof(u32) * 10);
|
||||
}
|
||||
|
||||
/* h = 0 */
|
||||
static __always_inline void fe_0(fe *h)
|
||||
{
|
||||
memset(h, 0, sizeof(u32) * 10);
|
||||
}
|
||||
|
||||
/* h = 1 */
|
||||
static __always_inline void fe_1(fe *h)
|
||||
{
|
||||
memset(h, 0, sizeof(u32) * 10);
|
||||
h->v[0] = 1;
|
||||
}
|
||||
|
||||
static void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
|
||||
{
|
||||
{ const u32 x20 = in1[9];
|
||||
{ const u32 x21 = in1[8];
|
||||
{ const u32 x19 = in1[7];
|
||||
{ const u32 x17 = in1[6];
|
||||
{ const u32 x15 = in1[5];
|
||||
{ const u32 x13 = in1[4];
|
||||
{ const u32 x11 = in1[3];
|
||||
{ const u32 x9 = in1[2];
|
||||
{ const u32 x7 = in1[1];
|
||||
{ const u32 x5 = in1[0];
|
||||
{ const u32 x38 = in2[9];
|
||||
{ const u32 x39 = in2[8];
|
||||
{ const u32 x37 = in2[7];
|
||||
{ const u32 x35 = in2[6];
|
||||
{ const u32 x33 = in2[5];
|
||||
{ const u32 x31 = in2[4];
|
||||
{ const u32 x29 = in2[3];
|
||||
{ const u32 x27 = in2[2];
|
||||
{ const u32 x25 = in2[1];
|
||||
{ const u32 x23 = in2[0];
|
||||
out[0] = (x5 + x23);
|
||||
out[1] = (x7 + x25);
|
||||
out[2] = (x9 + x27);
|
||||
out[3] = (x11 + x29);
|
||||
out[4] = (x13 + x31);
|
||||
out[5] = (x15 + x33);
|
||||
out[6] = (x17 + x35);
|
||||
out[7] = (x19 + x37);
|
||||
out[8] = (x21 + x39);
|
||||
out[9] = (x20 + x38);
|
||||
}}}}}}}}}}}}}}}}}}}}
|
||||
}
|
||||
|
||||
/* h = f + g
|
||||
* Can overlap h with f or g.
|
||||
*/
|
||||
static __always_inline void fe_add(fe_loose *h, const fe *f, const fe *g)
|
||||
{
|
||||
fe_add_impl(h->v, f->v, g->v);
|
||||
}
|
||||
|
||||
static void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
|
||||
{
|
||||
{ const u32 x20 = in1[9];
|
||||
{ const u32 x21 = in1[8];
|
||||
{ const u32 x19 = in1[7];
|
||||
{ const u32 x17 = in1[6];
|
||||
{ const u32 x15 = in1[5];
|
||||
{ const u32 x13 = in1[4];
|
||||
{ const u32 x11 = in1[3];
|
||||
{ const u32 x9 = in1[2];
|
||||
{ const u32 x7 = in1[1];
|
||||
{ const u32 x5 = in1[0];
|
||||
{ const u32 x38 = in2[9];
|
||||
{ const u32 x39 = in2[8];
|
||||
{ const u32 x37 = in2[7];
|
||||
{ const u32 x35 = in2[6];
|
||||
{ const u32 x33 = in2[5];
|
||||
{ const u32 x31 = in2[4];
|
||||
{ const u32 x29 = in2[3];
|
||||
{ const u32 x27 = in2[2];
|
||||
{ const u32 x25 = in2[1];
|
||||
{ const u32 x23 = in2[0];
|
||||
out[0] = ((0x7ffffda + x5) - x23);
|
||||
out[1] = ((0x3fffffe + x7) - x25);
|
||||
out[2] = ((0x7fffffe + x9) - x27);
|
||||
out[3] = ((0x3fffffe + x11) - x29);
|
||||
out[4] = ((0x7fffffe + x13) - x31);
|
||||
out[5] = ((0x3fffffe + x15) - x33);
|
||||
out[6] = ((0x7fffffe + x17) - x35);
|
||||
out[7] = ((0x3fffffe + x19) - x37);
|
||||
out[8] = ((0x7fffffe + x21) - x39);
|
||||
out[9] = ((0x3fffffe + x20) - x38);
|
||||
}}}}}}}}}}}}}}}}}}}}
|
||||
}
|
||||
|
||||
/* h = f - g
|
||||
* Can overlap h with f or g.
|
||||
*/
|
||||
static __always_inline void fe_sub(fe_loose *h, const fe *f, const fe *g)
|
||||
{
|
||||
fe_sub_impl(h->v, f->v, g->v);
|
||||
}
|
||||
|
||||
static void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
|
||||
{
|
||||
{ const u32 x20 = in1[9];
|
||||
{ const u32 x21 = in1[8];
|
||||
{ const u32 x19 = in1[7];
|
||||
{ const u32 x17 = in1[6];
|
||||
{ const u32 x15 = in1[5];
|
||||
{ const u32 x13 = in1[4];
|
||||
{ const u32 x11 = in1[3];
|
||||
{ const u32 x9 = in1[2];
|
||||
{ const u32 x7 = in1[1];
|
||||
{ const u32 x5 = in1[0];
|
||||
{ const u32 x38 = in2[9];
|
||||
{ const u32 x39 = in2[8];
|
||||
{ const u32 x37 = in2[7];
|
||||
{ const u32 x35 = in2[6];
|
||||
{ const u32 x33 = in2[5];
|
||||
{ const u32 x31 = in2[4];
|
||||
{ const u32 x29 = in2[3];
|
||||
{ const u32 x27 = in2[2];
|
||||
{ const u32 x25 = in2[1];
|
||||
{ const u32 x23 = in2[0];
|
||||
{ u64 x40 = ((u64)x23 * x5);
|
||||
{ u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5));
|
||||
{ u64 x42 = ((((u64)(0x2 * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5));
|
||||
{ u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5));
|
||||
{ u64 x44 = (((((u64)x27 * x9) + (0x2 * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5));
|
||||
{ u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5));
|
||||
{ u64 x46 = (((((0x2 * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5));
|
||||
{ u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5));
|
||||
{ u64 x48 = (((((((u64)x31 * x13) + (0x2 * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5));
|
||||
{ u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5));
|
||||
{ u64 x50 = (((((0x2 * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9));
|
||||
{ u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9));
|
||||
{ u64 x52 = (((((u64)x35 * x17) + (0x2 * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13));
|
||||
{ u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13));
|
||||
{ u64 x54 = (((0x2 * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17));
|
||||
{ u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17));
|
||||
{ u64 x56 = (((u64)x39 * x21) + (0x2 * (((u64)x37 * x20) + ((u64)x38 * x19))));
|
||||
{ u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21));
|
||||
{ u64 x58 = ((u64)(0x2 * x38) * x20);
|
||||
{ u64 x59 = (x48 + (x58 << 0x4));
|
||||
{ u64 x60 = (x59 + (x58 << 0x1));
|
||||
{ u64 x61 = (x60 + x58);
|
||||
{ u64 x62 = (x47 + (x57 << 0x4));
|
||||
{ u64 x63 = (x62 + (x57 << 0x1));
|
||||
{ u64 x64 = (x63 + x57);
|
||||
{ u64 x65 = (x46 + (x56 << 0x4));
|
||||
{ u64 x66 = (x65 + (x56 << 0x1));
|
||||
{ u64 x67 = (x66 + x56);
|
||||
{ u64 x68 = (x45 + (x55 << 0x4));
|
||||
{ u64 x69 = (x68 + (x55 << 0x1));
|
||||
{ u64 x70 = (x69 + x55);
|
||||
{ u64 x71 = (x44 + (x54 << 0x4));
|
||||
{ u64 x72 = (x71 + (x54 << 0x1));
|
||||
{ u64 x73 = (x72 + x54);
|
||||
{ u64 x74 = (x43 + (x53 << 0x4));
|
||||
{ u64 x75 = (x74 + (x53 << 0x1));
|
||||
{ u64 x76 = (x75 + x53);
|
||||
{ u64 x77 = (x42 + (x52 << 0x4));
|
||||
{ u64 x78 = (x77 + (x52 << 0x1));
|
||||
{ u64 x79 = (x78 + x52);
|
||||
{ u64 x80 = (x41 + (x51 << 0x4));
|
||||
{ u64 x81 = (x80 + (x51 << 0x1));
|
||||
{ u64 x82 = (x81 + x51);
|
||||
{ u64 x83 = (x40 + (x50 << 0x4));
|
||||
{ u64 x84 = (x83 + (x50 << 0x1));
|
||||
{ u64 x85 = (x84 + x50);
|
||||
{ u64 x86 = (x85 >> 0x1a);
|
||||
{ u32 x87 = ((u32)x85 & 0x3ffffff);
|
||||
{ u64 x88 = (x86 + x82);
|
||||
{ u64 x89 = (x88 >> 0x19);
|
||||
{ u32 x90 = ((u32)x88 & 0x1ffffff);
|
||||
{ u64 x91 = (x89 + x79);
|
||||
{ u64 x92 = (x91 >> 0x1a);
|
||||
{ u32 x93 = ((u32)x91 & 0x3ffffff);
|
||||
{ u64 x94 = (x92 + x76);
|
||||
{ u64 x95 = (x94 >> 0x19);
|
||||
{ u32 x96 = ((u32)x94 & 0x1ffffff);
|
||||
{ u64 x97 = (x95 + x73);
|
||||
{ u64 x98 = (x97 >> 0x1a);
|
||||
{ u32 x99 = ((u32)x97 & 0x3ffffff);
|
||||
{ u64 x100 = (x98 + x70);
|
||||
{ u64 x101 = (x100 >> 0x19);
|
||||
{ u32 x102 = ((u32)x100 & 0x1ffffff);
|
||||
{ u64 x103 = (x101 + x67);
|
||||
{ u64 x104 = (x103 >> 0x1a);
|
||||
{ u32 x105 = ((u32)x103 & 0x3ffffff);
|
||||
{ u64 x106 = (x104 + x64);
|
||||
{ u64 x107 = (x106 >> 0x19);
|
||||
{ u32 x108 = ((u32)x106 & 0x1ffffff);
|
||||
{ u64 x109 = (x107 + x61);
|
||||
{ u64 x110 = (x109 >> 0x1a);
|
||||
{ u32 x111 = ((u32)x109 & 0x3ffffff);
|
||||
{ u64 x112 = (x110 + x49);
|
||||
{ u64 x113 = (x112 >> 0x19);
|
||||
{ u32 x114 = ((u32)x112 & 0x1ffffff);
|
||||
{ u64 x115 = (x87 + (0x13 * x113));
|
||||
{ u32 x116 = (u32) (x115 >> 0x1a);
|
||||
{ u32 x117 = ((u32)x115 & 0x3ffffff);
|
||||
{ u32 x118 = (x116 + x90);
|
||||
{ u32 x119 = (x118 >> 0x19);
|
||||
{ u32 x120 = (x118 & 0x1ffffff);
|
||||
out[0] = x117;
|
||||
out[1] = x120;
|
||||
out[2] = (x119 + x93);
|
||||
out[3] = x96;
|
||||
out[4] = x99;
|
||||
out[5] = x102;
|
||||
out[6] = x105;
|
||||
out[7] = x108;
|
||||
out[8] = x111;
|
||||
out[9] = x114;
|
||||
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
|
||||
}
|
||||
|
||||
static __always_inline void fe_mul_ttt(fe *h, const fe *f, const fe *g)
|
||||
{
|
||||
fe_mul_impl(h->v, f->v, g->v);
|
||||
}
|
||||
|
||||
static __always_inline void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g)
|
||||
{
|
||||
fe_mul_impl(h->v, f->v, g->v);
|
||||
}
|
||||
|
||||
static __always_inline void
|
||||
fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g)
|
||||
{
|
||||
fe_mul_impl(h->v, f->v, g->v);
|
||||
}
|
||||
|
||||
static void fe_sqr_impl(u32 out[10], const u32 in1[10])
|
||||
{
|
||||
{ const u32 x17 = in1[9];
|
||||
{ const u32 x18 = in1[8];
|
||||
{ const u32 x16 = in1[7];
|
||||
{ const u32 x14 = in1[6];
|
||||
{ const u32 x12 = in1[5];
|
||||
{ const u32 x10 = in1[4];
|
||||
{ const u32 x8 = in1[3];
|
||||
{ const u32 x6 = in1[2];
|
||||
{ const u32 x4 = in1[1];
|
||||
{ const u32 x2 = in1[0];
|
||||
{ u64 x19 = ((u64)x2 * x2);
|
||||
{ u64 x20 = ((u64)(0x2 * x2) * x4);
|
||||
{ u64 x21 = (0x2 * (((u64)x4 * x4) + ((u64)x2 * x6)));
|
||||
{ u64 x22 = (0x2 * (((u64)x4 * x6) + ((u64)x2 * x8)));
|
||||
{ u64 x23 = ((((u64)x6 * x6) + ((u64)(0x4 * x4) * x8)) + ((u64)(0x2 * x2) * x10));
|
||||
{ u64 x24 = (0x2 * ((((u64)x6 * x8) + ((u64)x4 * x10)) + ((u64)x2 * x12)));
|
||||
{ u64 x25 = (0x2 * (((((u64)x8 * x8) + ((u64)x6 * x10)) + ((u64)x2 * x14)) + ((u64)(0x2 * x4) * x12)));
|
||||
{ u64 x26 = (0x2 * (((((u64)x8 * x10) + ((u64)x6 * x12)) + ((u64)x4 * x14)) + ((u64)x2 * x16)));
|
||||
{ u64 x27 = (((u64)x10 * x10) + (0x2 * ((((u64)x6 * x14) + ((u64)x2 * x18)) + (0x2 * (((u64)x4 * x16) + ((u64)x8 * x12))))));
|
||||
{ u64 x28 = (0x2 * ((((((u64)x10 * x12) + ((u64)x8 * x14)) + ((u64)x6 * x16)) + ((u64)x4 * x18)) + ((u64)x2 * x17)));
|
||||
{ u64 x29 = (0x2 * (((((u64)x12 * x12) + ((u64)x10 * x14)) + ((u64)x6 * x18)) + (0x2 * (((u64)x8 * x16) + ((u64)x4 * x17)))));
|
||||
{ u64 x30 = (0x2 * (((((u64)x12 * x14) + ((u64)x10 * x16)) + ((u64)x8 * x18)) + ((u64)x6 * x17)));
|
||||
{ u64 x31 = (((u64)x14 * x14) + (0x2 * (((u64)x10 * x18) + (0x2 * (((u64)x12 * x16) + ((u64)x8 * x17))))));
|
||||
{ u64 x32 = (0x2 * ((((u64)x14 * x16) + ((u64)x12 * x18)) + ((u64)x10 * x17)));
|
||||
{ u64 x33 = (0x2 * ((((u64)x16 * x16) + ((u64)x14 * x18)) + ((u64)(0x2 * x12) * x17)));
|
||||
{ u64 x34 = (0x2 * (((u64)x16 * x18) + ((u64)x14 * x17)));
|
||||
{ u64 x35 = (((u64)x18 * x18) + ((u64)(0x4 * x16) * x17));
|
||||
{ u64 x36 = ((u64)(0x2 * x18) * x17);
|
||||
{ u64 x37 = ((u64)(0x2 * x17) * x17);
|
||||
{ u64 x38 = (x27 + (x37 << 0x4));
|
||||
{ u64 x39 = (x38 + (x37 << 0x1));
|
||||
{ u64 x40 = (x39 + x37);
|
||||
{ u64 x41 = (x26 + (x36 << 0x4));
|
||||
{ u64 x42 = (x41 + (x36 << 0x1));
|
||||
{ u64 x43 = (x42 + x36);
|
||||
{ u64 x44 = (x25 + (x35 << 0x4));
|
||||
{ u64 x45 = (x44 + (x35 << 0x1));
|
||||
{ u64 x46 = (x45 + x35);
|
||||
{ u64 x47 = (x24 + (x34 << 0x4));
|
||||
{ u64 x48 = (x47 + (x34 << 0x1));
|
||||
{ u64 x49 = (x48 + x34);
|
||||
{ u64 x50 = (x23 + (x33 << 0x4));
|
||||
{ u64 x51 = (x50 + (x33 << 0x1));
|
||||
{ u64 x52 = (x51 + x33);
|
||||
{ u64 x53 = (x22 + (x32 << 0x4));
|
||||
{ u64 x54 = (x53 + (x32 << 0x1));
|
||||
{ u64 x55 = (x54 + x32);
|
||||
{ u64 x56 = (x21 + (x31 << 0x4));
|
||||
{ u64 x57 = (x56 + (x31 << 0x1));
|
||||
{ u64 x58 = (x57 + x31);
|
||||
{ u64 x59 = (x20 + (x30 << 0x4));
|
||||
{ u64 x60 = (x59 + (x30 << 0x1));
|
||||
{ u64 x61 = (x60 + x30);
|
||||
{ u64 x62 = (x19 + (x29 << 0x4));
|
||||
{ u64 x63 = (x62 + (x29 << 0x1));
|
||||
{ u64 x64 = (x63 + x29);
|
||||
{ u64 x65 = (x64 >> 0x1a);
|
||||
{ u32 x66 = ((u32)x64 & 0x3ffffff);
|
||||
{ u64 x67 = (x65 + x61);
|
||||
{ u64 x68 = (x67 >> 0x19);
|
||||
{ u32 x69 = ((u32)x67 & 0x1ffffff);
|
||||
{ u64 x70 = (x68 + x58);
|
||||
{ u64 x71 = (x70 >> 0x1a);
|
||||
{ u32 x72 = ((u32)x70 & 0x3ffffff);
|
||||
{ u64 x73 = (x71 + x55);
|
||||
{ u64 x74 = (x73 >> 0x19);
|
||||
{ u32 x75 = ((u32)x73 & 0x1ffffff);
|
||||
{ u64 x76 = (x74 + x52);
|
||||
{ u64 x77 = (x76 >> 0x1a);
|
||||
{ u32 x78 = ((u32)x76 & 0x3ffffff);
|
||||
{ u64 x79 = (x77 + x49);
|
||||
{ u64 x80 = (x79 >> 0x19);
|
||||
{ u32 x81 = ((u32)x79 & 0x1ffffff);
|
||||
{ u64 x82 = (x80 + x46);
|
||||
{ u64 x83 = (x82 >> 0x1a);
|
||||
{ u32 x84 = ((u32)x82 & 0x3ffffff);
|
||||
{ u64 x85 = (x83 + x43);
|
||||
{ u64 x86 = (x85 >> 0x19);
|
||||
{ u32 x87 = ((u32)x85 & 0x1ffffff);
|
||||
{ u64 x88 = (x86 + x40);
|
||||
{ u64 x89 = (x88 >> 0x1a);
|
||||
{ u32 x90 = ((u32)x88 & 0x3ffffff);
|
||||
{ u64 x91 = (x89 + x28);
|
||||
{ u64 x92 = (x91 >> 0x19);
|
||||
{ u32 x93 = ((u32)x91 & 0x1ffffff);
|
||||
{ u64 x94 = (x66 + (0x13 * x92));
|
||||
{ u32 x95 = (u32) (x94 >> 0x1a);
|
||||
{ u32 x96 = ((u32)x94 & 0x3ffffff);
|
||||
{ u32 x97 = (x95 + x69);
|
||||
{ u32 x98 = (x97 >> 0x19);
|
||||
{ u32 x99 = (x97 & 0x1ffffff);
|
||||
out[0] = x96;
|
||||
out[1] = x99;
|
||||
out[2] = (x98 + x72);
|
||||
out[3] = x75;
|
||||
out[4] = x78;
|
||||
out[5] = x81;
|
||||
out[6] = x84;
|
||||
out[7] = x87;
|
||||
out[8] = x90;
|
||||
out[9] = x93;
|
||||
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
|
||||
}
|
||||
|
||||
static __always_inline void fe_sq_tl(fe *h, const fe_loose *f)
|
||||
{
|
||||
fe_sqr_impl(h->v, f->v);
|
||||
}
|
||||
|
||||
static __always_inline void fe_sq_tt(fe *h, const fe *f)
|
||||
{
|
||||
fe_sqr_impl(h->v, f->v);
|
||||
}
|
||||
|
||||
static __always_inline void fe_loose_invert(fe *out, const fe_loose *z)
|
||||
{
|
||||
fe t0;
|
||||
fe t1;
|
||||
fe t2;
|
||||
fe t3;
|
||||
int i;
|
||||
|
||||
fe_sq_tl(&t0, z);
|
||||
fe_sq_tt(&t1, &t0);
|
||||
for (i = 1; i < 2; ++i)
|
||||
fe_sq_tt(&t1, &t1);
|
||||
fe_mul_tlt(&t1, z, &t1);
|
||||
fe_mul_ttt(&t0, &t0, &t1);
|
||||
fe_sq_tt(&t2, &t0);
|
||||
fe_mul_ttt(&t1, &t1, &t2);
|
||||
fe_sq_tt(&t2, &t1);
|
||||
for (i = 1; i < 5; ++i)
|
||||
fe_sq_tt(&t2, &t2);
|
||||
fe_mul_ttt(&t1, &t2, &t1);
|
||||
fe_sq_tt(&t2, &t1);
|
||||
for (i = 1; i < 10; ++i)
|
||||
fe_sq_tt(&t2, &t2);
|
||||
fe_mul_ttt(&t2, &t2, &t1);
|
||||
fe_sq_tt(&t3, &t2);
|
||||
for (i = 1; i < 20; ++i)
|
||||
fe_sq_tt(&t3, &t3);
|
||||
fe_mul_ttt(&t2, &t3, &t2);
|
||||
fe_sq_tt(&t2, &t2);
|
||||
for (i = 1; i < 10; ++i)
|
||||
fe_sq_tt(&t2, &t2);
|
||||
fe_mul_ttt(&t1, &t2, &t1);
|
||||
fe_sq_tt(&t2, &t1);
|
||||
for (i = 1; i < 50; ++i)
|
||||
fe_sq_tt(&t2, &t2);
|
||||
fe_mul_ttt(&t2, &t2, &t1);
|
||||
fe_sq_tt(&t3, &t2);
|
||||
for (i = 1; i < 100; ++i)
|
||||
fe_sq_tt(&t3, &t3);
|
||||
fe_mul_ttt(&t2, &t3, &t2);
|
||||
fe_sq_tt(&t2, &t2);
|
||||
for (i = 1; i < 50; ++i)
|
||||
fe_sq_tt(&t2, &t2);
|
||||
fe_mul_ttt(&t1, &t2, &t1);
|
||||
fe_sq_tt(&t1, &t1);
|
||||
for (i = 1; i < 5; ++i)
|
||||
fe_sq_tt(&t1, &t1);
|
||||
fe_mul_ttt(out, &t1, &t0);
|
||||
}
|
||||
|
||||
static __always_inline void fe_invert(fe *out, const fe *z)
|
||||
{
|
||||
fe_loose l;
|
||||
fe_copy_lt(&l, z);
|
||||
fe_loose_invert(out, &l);
|
||||
}
|
||||
|
||||
/* Replace (f,g) with (g,f) if b == 1;
|
||||
* replace (f,g) with (f,g) if b == 0.
|
||||
*
|
||||
* Preconditions: b in {0,1}
|
||||
*/
|
||||
static __always_inline void fe_cswap(fe *f, fe *g, unsigned int b)
|
||||
{
|
||||
unsigned i;
|
||||
b = 0 - b;
|
||||
for (i = 0; i < 10; i++) {
|
||||
u32 x = f->v[i] ^ g->v[i];
|
||||
x &= b;
|
||||
f->v[i] ^= x;
|
||||
g->v[i] ^= x;
|
||||
}
|
||||
}
|
||||
|
||||
/* NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0.*/
|
||||
static __always_inline void fe_mul_121666_impl(u32 out[10], const u32 in1[10])
|
||||
{
|
||||
{ const u32 x20 = in1[9];
|
||||
{ const u32 x21 = in1[8];
|
||||
{ const u32 x19 = in1[7];
|
||||
{ const u32 x17 = in1[6];
|
||||
{ const u32 x15 = in1[5];
|
||||
{ const u32 x13 = in1[4];
|
||||
{ const u32 x11 = in1[3];
|
||||
{ const u32 x9 = in1[2];
|
||||
{ const u32 x7 = in1[1];
|
||||
{ const u32 x5 = in1[0];
|
||||
{ const u32 x38 = 0;
|
||||
{ const u32 x39 = 0;
|
||||
{ const u32 x37 = 0;
|
||||
{ const u32 x35 = 0;
|
||||
{ const u32 x33 = 0;
|
||||
{ const u32 x31 = 0;
|
||||
{ const u32 x29 = 0;
|
||||
{ const u32 x27 = 0;
|
||||
{ const u32 x25 = 0;
|
||||
{ const u32 x23 = 121666;
|
||||
{ u64 x40 = ((u64)x23 * x5);
|
||||
{ u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5));
|
||||
{ u64 x42 = ((((u64)(0x2 * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5));
|
||||
{ u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5));
|
||||
{ u64 x44 = (((((u64)x27 * x9) + (0x2 * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5));
|
||||
{ u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5));
|
||||
{ u64 x46 = (((((0x2 * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5));
|
||||
{ u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5));
|
||||
{ u64 x48 = (((((((u64)x31 * x13) + (0x2 * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5));
|
||||
{ u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5));
|
||||
{ u64 x50 = (((((0x2 * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9));
|
||||
{ u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9));
|
||||
{ u64 x52 = (((((u64)x35 * x17) + (0x2 * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13));
|
||||
{ u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13));
|
||||
{ u64 x54 = (((0x2 * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17));
|
||||
{ u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17));
|
||||
{ u64 x56 = (((u64)x39 * x21) + (0x2 * (((u64)x37 * x20) + ((u64)x38 * x19))));
|
||||
{ u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21));
|
||||
{ u64 x58 = ((u64)(0x2 * x38) * x20);
|
||||
{ u64 x59 = (x48 + (x58 << 0x4));
|
||||
{ u64 x60 = (x59 + (x58 << 0x1));
|
||||
{ u64 x61 = (x60 + x58);
|
||||
{ u64 x62 = (x47 + (x57 << 0x4));
|
||||
{ u64 x63 = (x62 + (x57 << 0x1));
|
||||
{ u64 x64 = (x63 + x57);
|
||||
{ u64 x65 = (x46 + (x56 << 0x4));
|
||||
{ u64 x66 = (x65 + (x56 << 0x1));
|
||||
{ u64 x67 = (x66 + x56);
|
||||
{ u64 x68 = (x45 + (x55 << 0x4));
|
||||
{ u64 x69 = (x68 + (x55 << 0x1));
|
||||
{ u64 x70 = (x69 + x55);
|
||||
{ u64 x71 = (x44 + (x54 << 0x4));
|
||||
{ u64 x72 = (x71 + (x54 << 0x1));
|
||||
{ u64 x73 = (x72 + x54);
|
||||
{ u64 x74 = (x43 + (x53 << 0x4));
|
||||
{ u64 x75 = (x74 + (x53 << 0x1));
|
||||
{ u64 x76 = (x75 + x53);
|
||||
{ u64 x77 = (x42 + (x52 << 0x4));
|
||||
{ u64 x78 = (x77 + (x52 << 0x1));
|
||||
{ u64 x79 = (x78 + x52);
|
||||
{ u64 x80 = (x41 + (x51 << 0x4));
|
||||
{ u64 x81 = (x80 + (x51 << 0x1));
|
||||
{ u64 x82 = (x81 + x51);
|
||||
{ u64 x83 = (x40 + (x50 << 0x4));
|
||||
{ u64 x84 = (x83 + (x50 << 0x1));
|
||||
{ u64 x85 = (x84 + x50);
|
||||
{ u64 x86 = (x85 >> 0x1a);
|
||||
{ u32 x87 = ((u32)x85 & 0x3ffffff);
|
||||
{ u64 x88 = (x86 + x82);
|
||||
{ u64 x89 = (x88 >> 0x19);
|
||||
{ u32 x90 = ((u32)x88 & 0x1ffffff);
|
||||
{ u64 x91 = (x89 + x79);
|
||||
{ u64 x92 = (x91 >> 0x1a);
|
||||
{ u32 x93 = ((u32)x91 & 0x3ffffff);
|
||||
{ u64 x94 = (x92 + x76);
|
||||
{ u64 x95 = (x94 >> 0x19);
|
||||
{ u32 x96 = ((u32)x94 & 0x1ffffff);
|
||||
{ u64 x97 = (x95 + x73);
|
||||
{ u64 x98 = (x97 >> 0x1a);
|
||||
{ u32 x99 = ((u32)x97 & 0x3ffffff);
|
||||
{ u64 x100 = (x98 + x70);
|
||||
{ u64 x101 = (x100 >> 0x19);
|
||||
{ u32 x102 = ((u32)x100 & 0x1ffffff);
|
||||
{ u64 x103 = (x101 + x67);
|
||||
{ u64 x104 = (x103 >> 0x1a);
|
||||
{ u32 x105 = ((u32)x103 & 0x3ffffff);
|
||||
{ u64 x106 = (x104 + x64);
|
||||
{ u64 x107 = (x106 >> 0x19);
|
||||
{ u32 x108 = ((u32)x106 & 0x1ffffff);
|
||||
{ u64 x109 = (x107 + x61);
|
||||
{ u64 x110 = (x109 >> 0x1a);
|
||||
{ u32 x111 = ((u32)x109 & 0x3ffffff);
|
||||
{ u64 x112 = (x110 + x49);
|
||||
{ u64 x113 = (x112 >> 0x19);
|
||||
{ u32 x114 = ((u32)x112 & 0x1ffffff);
|
||||
{ u64 x115 = (x87 + (0x13 * x113));
|
||||
{ u32 x116 = (u32) (x115 >> 0x1a);
|
||||
{ u32 x117 = ((u32)x115 & 0x3ffffff);
|
||||
{ u32 x118 = (x116 + x90);
|
||||
{ u32 x119 = (x118 >> 0x19);
|
||||
{ u32 x120 = (x118 & 0x1ffffff);
|
||||
out[0] = x117;
|
||||
out[1] = x120;
|
||||
out[2] = (x119 + x93);
|
||||
out[3] = x96;
|
||||
out[4] = x99;
|
||||
out[5] = x102;
|
||||
out[6] = x105;
|
||||
out[7] = x108;
|
||||
out[8] = x111;
|
||||
out[9] = x114;
|
||||
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
|
||||
}
|
||||
|
||||
static __always_inline void fe_mul121666(fe *h, const fe_loose *f)
|
||||
{
|
||||
fe_mul_121666_impl(h->v, f->v);
|
||||
}
|
||||
|
||||
void curve25519_generic(u8 out[CURVE25519_KEY_SIZE],
|
||||
const u8 scalar[CURVE25519_KEY_SIZE],
|
||||
const u8 point[CURVE25519_KEY_SIZE])
|
||||
{
|
||||
fe x1, x2, z2, x3, z3;
|
||||
fe_loose x2l, z2l, x3l;
|
||||
unsigned swap = 0;
|
||||
int pos;
|
||||
u8 e[32];
|
||||
|
||||
memcpy(e, scalar, 32);
|
||||
|
||||
/* The following implementation was transcribed to Coq and proven to
|
||||
* correspond to unary scalar multiplication in affine coordinates given
|
||||
* that x1 != 0 is the x coordinate of some point on the curve. It was
|
||||
* also checked in Coq that doing a ladderstep with x1 = x3 = 0 gives
|
||||
* z2' = z3' = 0, and z2 = z3 = 0 gives z2' = z3' = 0. The statement was
|
||||
* quantified over the underlying field, so it applies to Curve25519
|
||||
* itself and the quadratic twist of Curve25519. It was not proven in
|
||||
* Coq that prime-field arithmetic correctly simulates extension-field
|
||||
* arithmetic on prime-field values. The decoding of the byte array
|
||||
* representation of e was not considered.
|
||||
*
|
||||
* Specification of Montgomery curves in affine coordinates:
|
||||
* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Spec/MontgomeryCurve.v#L27>
|
||||
*
|
||||
* Proof that these form a group that is isomorphic to a Weierstrass
|
||||
* curve:
|
||||
* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/AffineProofs.v#L35>
|
||||
*
|
||||
* Coq transcription and correctness proof of the loop
|
||||
* (where scalarbits=255):
|
||||
* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L118>
|
||||
* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L278>
|
||||
* preconditions: 0 <= e < 2^255 (not necessarily e < order),
|
||||
* fe_invert(0) = 0
|
||||
*/
|
||||
fe_frombytes(&x1, point);
|
||||
fe_1(&x2);
|
||||
fe_0(&z2);
|
||||
fe_copy(&x3, &x1);
|
||||
fe_1(&z3);
|
||||
|
||||
for (pos = 254; pos >= 0; --pos) {
|
||||
fe tmp0, tmp1;
|
||||
fe_loose tmp0l, tmp1l;
|
||||
/* loop invariant as of right before the test, for the case
|
||||
* where x1 != 0:
|
||||
* pos >= -1; if z2 = 0 then x2 is nonzero; if z3 = 0 then x3
|
||||
* is nonzero
|
||||
* let r := e >> (pos+1) in the following equalities of
|
||||
* projective points:
|
||||
* to_xz (r*P) === if swap then (x3, z3) else (x2, z2)
|
||||
* to_xz ((r+1)*P) === if swap then (x2, z2) else (x3, z3)
|
||||
* x1 is the nonzero x coordinate of the nonzero
|
||||
* point (r*P-(r+1)*P)
|
||||
*/
|
||||
unsigned b = 1 & (e[pos / 8] >> (pos & 7));
|
||||
swap ^= b;
|
||||
fe_cswap(&x2, &x3, swap);
|
||||
fe_cswap(&z2, &z3, swap);
|
||||
swap = b;
|
||||
/* Coq transcription of ladderstep formula (called from
|
||||
* transcribed loop):
|
||||
* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L89>
|
||||
* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L131>
|
||||
* x1 != 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L217>
|
||||
* x1 = 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L147>
|
||||
*/
|
||||
fe_sub(&tmp0l, &x3, &z3);
|
||||
fe_sub(&tmp1l, &x2, &z2);
|
||||
fe_add(&x2l, &x2, &z2);
|
||||
fe_add(&z2l, &x3, &z3);
|
||||
fe_mul_tll(&z3, &tmp0l, &x2l);
|
||||
fe_mul_tll(&z2, &z2l, &tmp1l);
|
||||
fe_sq_tl(&tmp0, &tmp1l);
|
||||
fe_sq_tl(&tmp1, &x2l);
|
||||
fe_add(&x3l, &z3, &z2);
|
||||
fe_sub(&z2l, &z3, &z2);
|
||||
fe_mul_ttt(&x2, &tmp1, &tmp0);
|
||||
fe_sub(&tmp1l, &tmp1, &tmp0);
|
||||
fe_sq_tl(&z2, &z2l);
|
||||
fe_mul121666(&z3, &tmp1l);
|
||||
fe_sq_tl(&x3, &x3l);
|
||||
fe_add(&tmp0l, &tmp0, &z3);
|
||||
fe_mul_ttt(&z3, &x1, &z2);
|
||||
fe_mul_tll(&z2, &tmp1l, &tmp0l);
|
||||
}
|
||||
/* here pos=-1, so r=e, so to_xz (e*P) === if swap then (x3, z3)
|
||||
* else (x2, z2)
|
||||
*/
|
||||
fe_cswap(&x2, &x3, swap);
|
||||
fe_cswap(&z2, &z3, swap);
|
||||
|
||||
fe_invert(&z2, &z2);
|
||||
fe_mul_ttt(&x2, &x2, &z2);
|
||||
fe_tobytes(out, &x2);
|
||||
|
||||
memzero_explicit(&x1, sizeof(x1));
|
||||
memzero_explicit(&x2, sizeof(x2));
|
||||
memzero_explicit(&z2, sizeof(z2));
|
||||
memzero_explicit(&x3, sizeof(x3));
|
||||
memzero_explicit(&z3, sizeof(z3));
|
||||
memzero_explicit(&x2l, sizeof(x2l));
|
||||
memzero_explicit(&z2l, sizeof(z2l));
|
||||
memzero_explicit(&x3l, sizeof(x3l));
|
||||
memzero_explicit(&e, sizeof(e));
|
||||
}
|
1947
sys/dev/if_wg/module/if_wg_session.c
Normal file
1947
sys/dev/if_wg/module/if_wg_session.c
Normal file
File diff suppressed because it is too large
Load Diff
847
sys/dev/if_wg/module/module.c
Normal file
847
sys/dev/if_wg/module/module.c
Normal file
@ -0,0 +1,847 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
|
||||
*
|
||||
* Copyright (c) 2019-2020 Rubicon Communications, LLC (Netgate)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include "opt_inet.h"
|
||||
#include "opt_inet6.h"
|
||||
#include <sys/param.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/priv.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/mbuf.h>
|
||||
#include <sys/module.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/sockio.h>
|
||||
#include <sys/queue.h>
|
||||
#include <sys/smp.h>
|
||||
|
||||
#include <net/if.h>
|
||||
#include <net/ethernet.h>
|
||||
#include <net/if_var.h>
|
||||
#include <net/iflib.h>
|
||||
#include <net/if_clone.h>
|
||||
#include <net/radix.h>
|
||||
#include <net/bpf.h>
|
||||
#include <net/mp_ring.h>
|
||||
|
||||
#include "ifdi_if.h"
|
||||
|
||||
#include <sys/wg_module.h>
|
||||
#include <crypto/zinc.h>
|
||||
#include <sys/wg_noise.h>
|
||||
#include <sys/if_wg_session_vars.h>
|
||||
#include <sys/if_wg_session.h>
|
||||
|
||||
MALLOC_DEFINE(M_WG, "WG", "wireguard");
|
||||
|
||||
#define WG_CAPS IFCAP_LINKSTATE
|
||||
#define ph_family PH_loc.eight[5]
|
||||
|
||||
TASKQGROUP_DECLARE(if_io_tqg);
|
||||
|
||||
static int clone_count;
|
||||
uma_zone_t ratelimit_zone;
|
||||
|
||||
void
|
||||
wg_encrypt_dispatch(struct wg_softc *sc)
|
||||
{
|
||||
for (int i = 0; i < mp_ncpus; i++) {
|
||||
if (sc->sc_encrypt[i].gt_task.ta_flags & TASK_ENQUEUED)
|
||||
continue;
|
||||
GROUPTASK_ENQUEUE(&sc->sc_encrypt[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
wg_decrypt_dispatch(struct wg_softc *sc)
|
||||
{
|
||||
for (int i = 0; i < mp_ncpus; i++) {
|
||||
if (sc->sc_decrypt[i].gt_task.ta_flags & TASK_ENQUEUED)
|
||||
continue;
|
||||
GROUPTASK_ENQUEUE(&sc->sc_decrypt[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
crypto_taskq_setup(struct wg_softc *sc)
|
||||
{
|
||||
device_t dev = iflib_get_dev(sc->wg_ctx);
|
||||
|
||||
sc->sc_encrypt = malloc(sizeof(struct grouptask)*mp_ncpus, M_WG, M_WAITOK);
|
||||
sc->sc_decrypt = malloc(sizeof(struct grouptask)*mp_ncpus, M_WG, M_WAITOK);
|
||||
|
||||
for (int i = 0; i < mp_ncpus; i++) {
|
||||
GROUPTASK_INIT(&sc->sc_encrypt[i], 0,
|
||||
(gtask_fn_t *)wg_softc_encrypt, sc);
|
||||
taskqgroup_attach_cpu(qgroup_if_io_tqg, &sc->sc_encrypt[i], sc, i, dev, NULL, "wg encrypt");
|
||||
GROUPTASK_INIT(&sc->sc_decrypt[i], 0,
|
||||
(gtask_fn_t *)wg_softc_decrypt, sc);
|
||||
taskqgroup_attach_cpu(qgroup_if_io_tqg, &sc->sc_decrypt[i], sc, i, dev, NULL, "wg decrypt");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
crypto_taskq_destroy(struct wg_softc *sc)
|
||||
{
|
||||
for (int i = 0; i < mp_ncpus; i++) {
|
||||
taskqgroup_detach(qgroup_if_io_tqg, &sc->sc_encrypt[i]);
|
||||
taskqgroup_detach(qgroup_if_io_tqg, &sc->sc_decrypt[i]);
|
||||
}
|
||||
free(sc->sc_encrypt, M_WG);
|
||||
free(sc->sc_decrypt, M_WG);
|
||||
}
|
||||
|
||||
static int
|
||||
wg_cloneattach(if_ctx_t ctx, struct if_clone *ifc, const char *name, caddr_t params)
|
||||
{
|
||||
struct wg_softc *sc = iflib_get_softc(ctx);
|
||||
if_softc_ctx_t scctx;
|
||||
device_t dev;
|
||||
struct iovec iov;
|
||||
nvlist_t *nvl;
|
||||
void *packed;
|
||||
struct noise_local *local;
|
||||
uint8_t public[WG_KEY_SIZE];
|
||||
struct noise_upcall noise_upcall;
|
||||
int err;
|
||||
uint16_t listen_port;
|
||||
const void *key;
|
||||
size_t size;
|
||||
|
||||
err = 0;
|
||||
dev = iflib_get_dev(ctx);
|
||||
if (params == NULL) {
|
||||
key = NULL;
|
||||
listen_port = 0;
|
||||
nvl = NULL;
|
||||
packed = NULL;
|
||||
goto unpacked;
|
||||
}
|
||||
if (copyin(params, &iov, sizeof(iov)))
|
||||
return (EFAULT);
|
||||
/* check that this is reasonable */
|
||||
size = iov.iov_len;
|
||||
packed = malloc(size, M_TEMP, M_WAITOK);
|
||||
if (copyin(iov.iov_base, packed, size)) {
|
||||
err = EFAULT;
|
||||
goto out;
|
||||
}
|
||||
nvl = nvlist_unpack(packed, size, 0);
|
||||
if (nvl == NULL) {
|
||||
device_printf(dev, "%s nvlist_unpack failed\n", __func__);
|
||||
err = EBADMSG;
|
||||
goto out;
|
||||
}
|
||||
if (!nvlist_exists_number(nvl, "listen-port")) {
|
||||
device_printf(dev, "%s listen-port not set\n", __func__);
|
||||
err = EBADMSG;
|
||||
goto nvl_out;
|
||||
}
|
||||
listen_port = nvlist_get_number(nvl, "listen-port");
|
||||
|
||||
if (!nvlist_exists_binary(nvl, "private-key")) {
|
||||
device_printf(dev, "%s private-key not set\n", __func__);
|
||||
err = EBADMSG;
|
||||
goto nvl_out;
|
||||
}
|
||||
key = nvlist_get_binary(nvl, "private-key", &size);
|
||||
if (size != CURVE25519_KEY_SIZE) {
|
||||
device_printf(dev, "%s bad length for private-key %zu\n", __func__, size);
|
||||
err = EBADMSG;
|
||||
goto nvl_out;
|
||||
}
|
||||
unpacked:
|
||||
local = &sc->sc_local;
|
||||
noise_upcall.u_arg = sc;
|
||||
noise_upcall.u_remote_get =
|
||||
(struct noise_remote *(*)(void *, uint8_t *))wg_remote_get;
|
||||
noise_upcall.u_index_set =
|
||||
(uint32_t (*)(void *, struct noise_remote *))wg_index_set;
|
||||
noise_upcall.u_index_drop =
|
||||
(void (*)(void *, uint32_t))wg_index_drop;
|
||||
noise_local_init(local, &noise_upcall);
|
||||
cookie_checker_init(&sc->sc_cookie, ratelimit_zone);
|
||||
|
||||
sc->sc_socket.so_port = listen_port;
|
||||
|
||||
if (key != NULL) {
|
||||
noise_local_set_private(local, __DECONST(uint8_t *, key));
|
||||
noise_local_keys(local, public, NULL);
|
||||
cookie_checker_update(&sc->sc_cookie, public);
|
||||
}
|
||||
atomic_add_int(&clone_count, 1);
|
||||
scctx = sc->shared = iflib_get_softc_ctx(ctx);
|
||||
scctx->isc_capenable = WG_CAPS;
|
||||
scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_IP6_TCP \
|
||||
| CSUM_IP6_UDP | CSUM_IP6_TCP;
|
||||
sc->wg_ctx = ctx;
|
||||
sc->sc_ifp = iflib_get_ifp(ctx);
|
||||
|
||||
mbufq_init(&sc->sc_handshake_queue, MAX_QUEUED_INCOMING_HANDSHAKES);
|
||||
mtx_init(&sc->sc_mtx, NULL, "wg softc lock", MTX_DEF);
|
||||
rw_init(&sc->sc_index_lock, "wg index lock");
|
||||
sc->sc_encap_ring = buf_ring_alloc(MAX_QUEUED_PACKETS, M_WG, M_WAITOK, &sc->sc_mtx);
|
||||
sc->sc_decap_ring = buf_ring_alloc(MAX_QUEUED_PACKETS, M_WG, M_WAITOK, &sc->sc_mtx);
|
||||
GROUPTASK_INIT(&sc->sc_handshake, 0,
|
||||
(gtask_fn_t *)wg_softc_handshake_receive, sc);
|
||||
taskqgroup_attach(qgroup_if_io_tqg, &sc->sc_handshake, sc, dev, NULL, "wg tx initiation");
|
||||
crypto_taskq_setup(sc);
|
||||
nvl_out:
|
||||
if (nvl != NULL)
|
||||
nvlist_destroy(nvl);
|
||||
out:
|
||||
free(packed, M_TEMP);
|
||||
return (err);
|
||||
}
|
||||
|
||||
static int
|
||||
wg_transmit(struct ifnet *ifp, struct mbuf *m)
|
||||
{
|
||||
struct wg_softc *sc;
|
||||
sa_family_t family;
|
||||
struct epoch_tracker et;
|
||||
struct wg_peer *peer;
|
||||
struct wg_tag *t;
|
||||
uint32_t af;
|
||||
int rc;
|
||||
|
||||
|
||||
/*
|
||||
* Work around lifetime issue in the ipv6 mld code.
|
||||
*/
|
||||
if (__predict_false(ifp->if_flags & IFF_DYING))
|
||||
return (ENXIO);
|
||||
|
||||
rc = 0;
|
||||
sc = iflib_get_softc(ifp->if_softc);
|
||||
if ((t = wg_tag_get(m)) == NULL) {
|
||||
rc = ENOBUFS;
|
||||
goto early_out;
|
||||
}
|
||||
af = m->m_pkthdr.ph_family;
|
||||
BPF_MTAP2(ifp, &af, sizeof(af), m);
|
||||
|
||||
NET_EPOCH_ENTER(et);
|
||||
peer = wg_route_lookup(&sc->sc_routes, m, OUT);
|
||||
if (__predict_false(peer == NULL)) {
|
||||
rc = ENOKEY;
|
||||
printf("peer not found - dropping %p\n", m);
|
||||
/* XXX log */
|
||||
goto err;
|
||||
}
|
||||
|
||||
family = atomic_load_acq(peer->p_endpoint.e_remote.r_sa.sa_family);
|
||||
if (__predict_false(family != AF_INET && family != AF_INET6)) {
|
||||
rc = EHOSTUNREACH;
|
||||
/* XXX log */
|
||||
goto err;
|
||||
}
|
||||
t->t_peer = peer;
|
||||
t->t_mbuf = NULL;
|
||||
t->t_done = 0;
|
||||
t->t_mtu = ifp->if_mtu;
|
||||
|
||||
rc = wg_queue_out(peer, m);
|
||||
if (rc == 0)
|
||||
wg_encrypt_dispatch(peer->p_sc);
|
||||
NET_EPOCH_EXIT(et);
|
||||
return (rc);
|
||||
err:
|
||||
NET_EPOCH_EXIT(et);
|
||||
early_out:
|
||||
if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
|
||||
/* XXX send ICMP unreachable */
|
||||
m_free(m);
|
||||
return (rc);
|
||||
}
|
||||
|
||||
static int
|
||||
wg_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa, struct route *rt)
|
||||
{
|
||||
m->m_pkthdr.ph_family = sa->sa_family;
|
||||
return (wg_transmit(ifp, m));
|
||||
}
|
||||
|
||||
static int
|
||||
wg_attach_post(if_ctx_t ctx)
|
||||
{
|
||||
struct ifnet *ifp;
|
||||
struct wg_softc *sc;
|
||||
|
||||
sc = iflib_get_softc(ctx);
|
||||
ifp = iflib_get_ifp(ctx);
|
||||
if_setmtu(ifp, ETHERMTU - 80);
|
||||
|
||||
if_setflagbits(ifp, IFF_NOARP, IFF_POINTOPOINT);
|
||||
ifp->if_transmit = wg_transmit;
|
||||
ifp->if_output = wg_output;
|
||||
|
||||
wg_hashtable_init(&sc->sc_hashtable);
|
||||
sc->sc_index = hashinit(HASHTABLE_INDEX_SIZE, M_DEVBUF, &sc->sc_index_mask);
|
||||
wg_route_init(&sc->sc_routes);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
wg_mtu_set(if_ctx_t ctx, uint32_t mtu)
|
||||
{
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
wg_set_promisc(if_ctx_t ctx, int flags)
|
||||
{
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
wg_detach(if_ctx_t ctx)
|
||||
{
|
||||
struct wg_softc *sc;
|
||||
|
||||
sc = iflib_get_softc(ctx);
|
||||
if_link_state_change(sc->sc_ifp, LINK_STATE_DOWN);
|
||||
NET_EPOCH_WAIT();
|
||||
wg_socket_reinit(sc, NULL, NULL);
|
||||
taskqgroup_drain_all(qgroup_if_io_tqg);
|
||||
pause("link_down", hz/4);
|
||||
wg_peer_remove_all(sc);
|
||||
pause("link_down", hz);
|
||||
mtx_destroy(&sc->sc_mtx);
|
||||
rw_destroy(&sc->sc_index_lock);
|
||||
taskqgroup_detach(qgroup_if_io_tqg, &sc->sc_handshake);
|
||||
crypto_taskq_destroy(sc);
|
||||
buf_ring_free(sc->sc_encap_ring, M_WG);
|
||||
buf_ring_free(sc->sc_decap_ring, M_WG);
|
||||
|
||||
wg_route_destroy(&sc->sc_routes);
|
||||
wg_hashtable_destroy(&sc->sc_hashtable);
|
||||
atomic_add_int(&clone_count, -1);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
wg_init(if_ctx_t ctx)
|
||||
{
|
||||
struct ifnet *ifp;
|
||||
struct wg_softc *sc;
|
||||
int rc;
|
||||
|
||||
sc = iflib_get_softc(ctx);
|
||||
ifp = iflib_get_ifp(ctx);
|
||||
rc = wg_socket_init(sc);
|
||||
if (rc)
|
||||
return;
|
||||
if_link_state_change(ifp, LINK_STATE_UP);
|
||||
}
|
||||
|
||||
static void
|
||||
wg_stop(if_ctx_t ctx)
|
||||
{
|
||||
struct wg_softc *sc;
|
||||
struct ifnet *ifp;
|
||||
|
||||
sc = iflib_get_softc(ctx);
|
||||
ifp = iflib_get_ifp(ctx);
|
||||
if_link_state_change(ifp, LINK_STATE_DOWN);
|
||||
}
|
||||
|
||||
static nvlist_t *
|
||||
wg_peer_to_nvl(struct wg_peer *peer)
|
||||
{
|
||||
struct wg_route *rt;
|
||||
int i, count;
|
||||
nvlist_t *nvl;
|
||||
caddr_t key;
|
||||
struct wg_allowedip *aip;
|
||||
|
||||
if ((nvl = nvlist_create(0)) == NULL)
|
||||
return (NULL);
|
||||
key = peer->p_remote.r_public;
|
||||
nvlist_add_binary(nvl, "public-key", key, WG_KEY_SIZE);
|
||||
nvlist_add_binary(nvl, "endpoint", &peer->p_endpoint.e_remote, sizeof(struct sockaddr));
|
||||
i = count = 0;
|
||||
CK_LIST_FOREACH(rt, &peer->p_routes, r_entry) {
|
||||
count++;
|
||||
}
|
||||
aip = malloc(count*sizeof(*aip), M_TEMP, M_WAITOK);
|
||||
CK_LIST_FOREACH(rt, &peer->p_routes, r_entry) {
|
||||
memcpy(&aip[i++], &rt->r_cidr, sizeof(*aip));
|
||||
}
|
||||
nvlist_add_binary(nvl, "allowed-ips", aip, count*sizeof(*aip));
|
||||
free(aip, M_TEMP);
|
||||
return (nvl);
|
||||
}
|
||||
|
||||
static int
|
||||
wg_marshal_peers(struct wg_softc *sc, nvlist_t **nvlp, nvlist_t ***nvl_arrayp, int *peer_countp)
|
||||
{
|
||||
struct wg_peer *peer;
|
||||
int err, i, peer_count;
|
||||
nvlist_t *nvl, **nvl_array;
|
||||
struct epoch_tracker et;
|
||||
#ifdef INVARIANTS
|
||||
void *packed;
|
||||
size_t size;
|
||||
#endif
|
||||
nvl = NULL;
|
||||
nvl_array = NULL;
|
||||
if (nvl_arrayp)
|
||||
*nvl_arrayp = NULL;
|
||||
if (nvlp)
|
||||
*nvlp = NULL;
|
||||
if (peer_countp)
|
||||
*peer_countp = 0;
|
||||
peer_count = sc->sc_hashtable.h_num_peers;
|
||||
if (peer_count == 0) {
|
||||
printf("no peers found\n");
|
||||
return (ENOENT);
|
||||
}
|
||||
|
||||
if (nvlp && (nvl = nvlist_create(0)) == NULL)
|
||||
return (ENOMEM);
|
||||
err = i = 0;
|
||||
nvl_array = malloc(peer_count*sizeof(void*), M_TEMP, M_WAITOK);
|
||||
NET_EPOCH_ENTER(et);
|
||||
CK_LIST_FOREACH(peer, &sc->sc_hashtable.h_peers_list, p_entry) {
|
||||
nvl_array[i] = wg_peer_to_nvl(peer);
|
||||
if (nvl_array[i] == NULL) {
|
||||
printf("wg_peer_to_nvl failed on %d peer\n", i);
|
||||
break;
|
||||
}
|
||||
#ifdef INVARIANTS
|
||||
packed = nvlist_pack(nvl_array[i], &size);
|
||||
if (packed == NULL) {
|
||||
printf("nvlist_pack(%p, %p) => %d",
|
||||
nvl_array[i], &size, nvlist_error(nvl));
|
||||
}
|
||||
free(packed, M_NVLIST);
|
||||
#endif
|
||||
i++;
|
||||
if (i == peer_count)
|
||||
break;
|
||||
}
|
||||
NET_EPOCH_EXIT(et);
|
||||
*peer_countp = peer_count = i;
|
||||
if (peer_count == 0) {
|
||||
printf("no peers found in list\n");
|
||||
err = ENOENT;
|
||||
goto out;
|
||||
}
|
||||
if (nvl) {
|
||||
nvlist_add_nvlist_array(nvl, "peer-list",
|
||||
(const nvlist_t * const *)nvl_array, peer_count);
|
||||
if ((err = nvlist_error(nvl))) {
|
||||
printf("nvlist_add_nvlist_array(%p, \"peer-list\", %p, %d) => %d\n",
|
||||
nvl, nvl_array, peer_count, err);
|
||||
goto out;
|
||||
}
|
||||
*nvlp = nvl;
|
||||
}
|
||||
*nvl_arrayp = nvl_array;
|
||||
return (0);
|
||||
out:
|
||||
return (err);
|
||||
}
|
||||
|
||||
static int
|
||||
wgc_get(struct wg_softc *sc, struct ifdrv *ifd)
|
||||
{
|
||||
nvlist_t *nvl, **nvl_array;
|
||||
void *packed;
|
||||
size_t size;
|
||||
int peer_count, err;
|
||||
|
||||
nvl = nvlist_create(0);
|
||||
if (nvl == NULL)
|
||||
return (ENOMEM);
|
||||
|
||||
err = 0;
|
||||
packed = NULL;
|
||||
if (sc->sc_socket.so_port != 0)
|
||||
nvlist_add_number(nvl, "listen-port", sc->sc_socket.so_port);
|
||||
if (sc->sc_local.l_has_identity) {
|
||||
nvlist_add_binary(nvl, "public-key", sc->sc_local.l_public, WG_KEY_SIZE);
|
||||
if (curthread->td_ucred->cr_uid == 0)
|
||||
nvlist_add_binary(nvl, "private-key", sc->sc_local.l_private, WG_KEY_SIZE);
|
||||
}
|
||||
if (sc->sc_hashtable.h_num_peers > 0) {
|
||||
err = wg_marshal_peers(sc, NULL, &nvl_array, &peer_count);
|
||||
if (err)
|
||||
goto out;
|
||||
nvlist_add_nvlist_array(nvl, "peer-list",
|
||||
(const nvlist_t * const *)nvl_array, peer_count);
|
||||
}
|
||||
packed = nvlist_pack(nvl, &size);
|
||||
if (packed == NULL)
|
||||
return (ENOMEM);
|
||||
if (ifd->ifd_len == 0) {
|
||||
ifd->ifd_len = size;
|
||||
goto out;
|
||||
}
|
||||
if (ifd->ifd_len < size) {
|
||||
err = ENOSPC;
|
||||
goto out;
|
||||
}
|
||||
if (ifd->ifd_data == NULL) {
|
||||
err = EFAULT;
|
||||
goto out;
|
||||
}
|
||||
err = copyout(packed, ifd->ifd_data, size);
|
||||
ifd->ifd_len = size;
|
||||
out:
|
||||
nvlist_destroy(nvl);
|
||||
free(packed, M_NVLIST);
|
||||
return (err);
|
||||
}
|
||||
|
||||
static bool
|
||||
wg_allowedip_valid(const struct wg_allowedip *wip)
|
||||
{
|
||||
|
||||
return (true);
|
||||
}
|
||||
|
||||
static int
|
||||
wg_peer_add(struct wg_softc *sc, const nvlist_t *nvl)
|
||||
{
|
||||
uint8_t public[WG_KEY_SIZE];
|
||||
const void *pub_key;
|
||||
const struct sockaddr *endpoint;
|
||||
int i, err, allowedip_count;
|
||||
device_t dev;
|
||||
size_t size;
|
||||
struct wg_peer *peer = NULL;
|
||||
bool need_insert = false;
|
||||
dev = iflib_get_dev(sc->wg_ctx);
|
||||
|
||||
if (!nvlist_exists_binary(nvl, "public-key")) {
|
||||
device_printf(dev, "peer has no public-key\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
pub_key = nvlist_get_binary(nvl, "public-key", &size);
|
||||
if (size != CURVE25519_KEY_SIZE) {
|
||||
device_printf(dev, "%s bad length for public-key %zu\n", __func__, size);
|
||||
return (EINVAL);
|
||||
}
|
||||
if (noise_local_keys(&sc->sc_local, public, NULL) == 0 &&
|
||||
bcmp(public, pub_key, WG_KEY_SIZE) == 0) {
|
||||
device_printf(dev, "public-key for peer already in use by host\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
peer = wg_peer_lookup(sc, pub_key);
|
||||
if (nvlist_exists_bool(nvl, "peer-remove") &&
|
||||
nvlist_get_bool(nvl, "peer-remove")) {
|
||||
if (peer != NULL) {
|
||||
wg_hashtable_peer_remove(&sc->sc_hashtable, peer);
|
||||
wg_peer_destroy(peer);
|
||||
/* XXX free */
|
||||
printf("peer removed\n");
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
if (nvlist_exists_bool(nvl, "replace-allowedips") &&
|
||||
nvlist_get_bool(nvl, "replace-allowedips") &&
|
||||
peer != NULL) {
|
||||
|
||||
wg_route_delete(&peer->p_sc->sc_routes, peer);
|
||||
}
|
||||
if (peer == NULL) {
|
||||
need_insert = true;
|
||||
peer = wg_peer_alloc(sc);
|
||||
noise_remote_init(&peer->p_remote, pub_key, &sc->sc_local);
|
||||
cookie_maker_init(&peer->p_cookie, pub_key);
|
||||
}
|
||||
if (nvlist_exists_binary(nvl, "endpoint")) {
|
||||
endpoint = nvlist_get_binary(nvl, "endpoint", &size);
|
||||
if (size != sizeof(*endpoint)) {
|
||||
device_printf(dev, "%s bad length for endpoint %zu\n", __func__, size);
|
||||
err = EBADMSG;
|
||||
goto out;
|
||||
}
|
||||
memcpy(&peer->p_endpoint.e_remote, endpoint,
|
||||
sizeof(peer->p_endpoint.e_remote));
|
||||
}
|
||||
if (nvlist_exists_binary(nvl, "pre-shared-key")) {
|
||||
const void *key;
|
||||
|
||||
key = nvlist_get_binary(nvl, "pre-shared-key", &size);
|
||||
noise_remote_set_psk(&peer->p_remote, key);
|
||||
}
|
||||
if (nvlist_exists_number(nvl, "persistent-keepalive-interval")) {
|
||||
uint16_t pki;
|
||||
|
||||
pki = nvlist_get_number(nvl, "persistent-keepalive-interval");
|
||||
wg_timers_set_persistent_keepalive(&peer->p_timers, pki);
|
||||
}
|
||||
if (nvlist_exists_binary(nvl, "allowed-ips")) {
|
||||
const struct wg_allowedip *aip, *aip_base;
|
||||
|
||||
aip = aip_base = nvlist_get_binary(nvl, "allowed-ips", &size);
|
||||
if (size % sizeof(struct wg_allowedip) != 0) {
|
||||
device_printf(dev, "%s bad length for allowed-ips %zu not integer multiple of struct size\n", __func__, size);
|
||||
err = EBADMSG;
|
||||
goto out;
|
||||
}
|
||||
allowedip_count = size/sizeof(struct wg_allowedip);
|
||||
for (i = 0; i < allowedip_count; i++) {
|
||||
if (!wg_allowedip_valid(&aip_base[i])) {
|
||||
device_printf(dev, "%s allowedip %d not valid\n", __func__, i);
|
||||
err = EBADMSG;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < allowedip_count; i++, aip++) {
|
||||
if ((err = wg_route_add(&sc->sc_routes, peer, aip)) != 0) {
|
||||
printf("route add %d failed -> %d\n", i, err);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (need_insert)
|
||||
wg_hashtable_peer_insert(&sc->sc_hashtable, peer);
|
||||
return (0);
|
||||
|
||||
out:
|
||||
wg_peer_destroy(peer);
|
||||
return (err);
|
||||
}
|
||||
|
||||
static int
|
||||
wgc_set(struct wg_softc *sc, struct ifdrv *ifd)
|
||||
{
|
||||
uint8_t public[WG_KEY_SIZE];
|
||||
void *nvlpacked;
|
||||
nvlist_t *nvl;
|
||||
device_t dev;
|
||||
ssize_t size;
|
||||
int err;
|
||||
|
||||
if (ifd->ifd_len == 0 || ifd->ifd_data == NULL)
|
||||
return (EFAULT);
|
||||
|
||||
dev = iflib_get_dev(sc->wg_ctx);
|
||||
nvlpacked = malloc(ifd->ifd_len, M_TEMP, M_WAITOK);
|
||||
err = copyin(ifd->ifd_data, nvlpacked, ifd->ifd_len);
|
||||
if (err)
|
||||
goto out;
|
||||
nvl = nvlist_unpack(nvlpacked, ifd->ifd_len, 0);
|
||||
if (nvl == NULL) {
|
||||
device_printf(dev, "%s nvlist_unpack failed\n", __func__);
|
||||
err = EBADMSG;
|
||||
goto out;
|
||||
}
|
||||
if (nvlist_exists_bool(nvl, "replace-peers") &&
|
||||
nvlist_get_bool(nvl, "replace-peers"))
|
||||
wg_peer_remove_all(sc);
|
||||
if (nvlist_exists_number(nvl, "listen-port")) {
|
||||
int listen_port __unused = nvlist_get_number(nvl, "listen-port");
|
||||
/*
|
||||
* Set listen port
|
||||
*/
|
||||
if_link_state_change(sc->sc_ifp, LINK_STATE_DOWN);
|
||||
pause("link_down", hz/4);
|
||||
wg_socket_reinit(sc, NULL, NULL);
|
||||
sc->sc_socket.so_port = listen_port;
|
||||
if ((err = wg_socket_init(sc)) != 0)
|
||||
goto out;
|
||||
if_link_state_change(sc->sc_ifp, LINK_STATE_UP);
|
||||
}
|
||||
if (nvlist_exists_binary(nvl, "private-key")) {
|
||||
struct noise_local *local;
|
||||
const void *key = nvlist_get_binary(nvl, "private-key", &size);
|
||||
|
||||
if (size != CURVE25519_KEY_SIZE) {
|
||||
device_printf(dev, "%s bad length for private-key %zu\n", __func__, size);
|
||||
err = EBADMSG;
|
||||
goto nvl_out;
|
||||
}
|
||||
/*
|
||||
* set private key
|
||||
*/
|
||||
local = &sc->sc_local;
|
||||
noise_local_set_private(local, __DECONST(uint8_t *, key));
|
||||
noise_local_keys(local, public, NULL);
|
||||
cookie_checker_update(&sc->sc_cookie, public);
|
||||
}
|
||||
if (nvlist_exists_number(nvl, "user-cookie")) {
|
||||
sc->sc_user_cookie = nvlist_get_number(nvl, "user-cookie");
|
||||
/*
|
||||
* setsockopt
|
||||
*/
|
||||
}
|
||||
if (nvlist_exists_nvlist_array(nvl, "peer-list")) {
|
||||
size_t peercount;
|
||||
const nvlist_t * const*nvl_peers;
|
||||
|
||||
nvl_peers = nvlist_get_nvlist_array(nvl, "peer-list", &peercount);
|
||||
for (int i = 0; i < peercount; i++) {
|
||||
wg_peer_add(sc, nvl_peers[i]);
|
||||
}
|
||||
}
|
||||
nvl_out:
|
||||
nvlist_destroy(nvl);
|
||||
out:
|
||||
free(nvlpacked, M_TEMP);
|
||||
return (err);
|
||||
}
|
||||
|
||||
static int
|
||||
wg_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data)
|
||||
{
|
||||
struct wg_softc *sc = iflib_get_softc(ctx);
|
||||
struct ifdrv *ifd = (struct ifdrv *)data;
|
||||
int ifd_cmd;
|
||||
|
||||
switch (command) {
|
||||
case SIOCGDRVSPEC:
|
||||
case SIOCSDRVSPEC:
|
||||
ifd_cmd = ifd->ifd_cmd;
|
||||
break;
|
||||
default:
|
||||
return (EINVAL);
|
||||
}
|
||||
switch (ifd_cmd) {
|
||||
case WGC_GET:
|
||||
return (wgc_get(sc, ifd));
|
||||
break;
|
||||
case WGC_SET:
|
||||
if (priv_check(curthread, PRIV_NET_HWIOCTL))
|
||||
return (EPERM);
|
||||
return (wgc_set(sc, ifd));
|
||||
break;
|
||||
}
|
||||
return (ENOTSUP);
|
||||
}
|
||||
|
||||
static device_method_t wg_if_methods[] = {
|
||||
DEVMETHOD(ifdi_cloneattach, wg_cloneattach),
|
||||
DEVMETHOD(ifdi_attach_post, wg_attach_post),
|
||||
DEVMETHOD(ifdi_detach, wg_detach),
|
||||
DEVMETHOD(ifdi_init, wg_init),
|
||||
DEVMETHOD(ifdi_stop, wg_stop),
|
||||
DEVMETHOD(ifdi_priv_ioctl, wg_priv_ioctl),
|
||||
DEVMETHOD(ifdi_mtu_set, wg_mtu_set),
|
||||
DEVMETHOD(ifdi_promisc_set, wg_set_promisc),
|
||||
DEVMETHOD_END
|
||||
};
|
||||
|
||||
static driver_t wg_iflib_driver = {
|
||||
"wg", wg_if_methods, sizeof(struct wg_softc)
|
||||
};
|
||||
|
||||
char wg_driver_version[] = "0.0.1";
|
||||
|
||||
static struct if_shared_ctx wg_sctx_init = {
|
||||
.isc_magic = IFLIB_MAGIC,
|
||||
.isc_driver_version = wg_driver_version,
|
||||
.isc_driver = &wg_iflib_driver,
|
||||
.isc_flags = IFLIB_PSEUDO,
|
||||
.isc_name = "wg",
|
||||
};
|
||||
|
||||
if_shared_ctx_t wg_sctx = &wg_sctx_init;
|
||||
static if_pseudo_t wg_pseudo;
|
||||
|
||||
|
||||
int
|
||||
wg_ctx_init(void)
|
||||
{
|
||||
ratelimit_zone = uma_zcreate("wg ratelimit", sizeof(struct ratelimit),
|
||||
NULL, NULL, NULL, NULL, 0, 0);
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
wg_ctx_uninit(void)
|
||||
{
|
||||
uma_zdestroy(ratelimit_zone);
|
||||
}
|
||||
|
||||
static int
|
||||
wg_module_init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if ((rc = wg_ctx_init()))
|
||||
return (rc);
|
||||
|
||||
wg_pseudo = iflib_clone_register(wg_sctx);
|
||||
if (wg_pseudo == NULL)
|
||||
return (ENXIO);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
wg_module_deinit(void)
|
||||
{
|
||||
wg_ctx_uninit();
|
||||
iflib_clone_deregister(wg_pseudo);
|
||||
}
|
||||
|
||||
static int
|
||||
wg_module_event_handler(module_t mod, int what, void *arg)
|
||||
{
|
||||
int err;
|
||||
|
||||
switch (what) {
|
||||
case MOD_LOAD:
|
||||
if ((err = wg_module_init()) != 0)
|
||||
return (err);
|
||||
break;
|
||||
case MOD_UNLOAD:
|
||||
if (clone_count == 0)
|
||||
wg_module_deinit();
|
||||
else
|
||||
return (EBUSY);
|
||||
break;
|
||||
default:
|
||||
return (EOPNOTSUPP);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
static moduledata_t wg_moduledata = {
|
||||
"wg",
|
||||
wg_module_event_handler,
|
||||
NULL
|
||||
};
|
||||
|
||||
DECLARE_MODULE(wg, wg_moduledata, SI_SUB_PSEUDO, SI_ORDER_ANY);
|
||||
MODULE_VERSION(wg, 1);
|
||||
MODULE_DEPEND(wg, iflib, 1, 1, 1);
|
||||
MODULE_DEPEND(wg, blake2, 1, 1, 1);
|
||||
MODULE_DEPEND(wg, crypto, 1, 1, 1);
|
3021
sys/dev/if_wg/module/poly1305-x86_64.S
Normal file
3021
sys/dev/if_wg/module/poly1305-x86_64.S
Normal file
File diff suppressed because it is too large
Load Diff
399
sys/dev/if_wg/module/wg_cookie.c
Normal file
399
sys/dev/if_wg/module/wg_cookie.c
Normal file
@ -0,0 +1,399 @@
|
||||
/*
|
||||
* Copyright (C) 2015-2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
* Copyright (C) 2019-2020 Matt Dunwoodie <ncon@noconroy.net>
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/rwlock.h>
|
||||
#include <sys/malloc.h> /* Because systm doesn't include M_NOWAIT, M_DEVBUF */
|
||||
#include <sys/socket.h>
|
||||
|
||||
#include <sys/wg_cookie.h>
|
||||
#include <zinc/chacha20poly1305.h>
|
||||
|
||||
static void cookie_precompute_key(uint8_t *,
|
||||
const uint8_t[COOKIE_INPUT_SIZE], const char *);
|
||||
static void cookie_macs_mac1(struct cookie_macs *, const void *, size_t,
|
||||
const uint8_t[COOKIE_KEY_SIZE]);
|
||||
static void cookie_macs_mac2(struct cookie_macs *, const void *, size_t,
|
||||
const uint8_t[COOKIE_COOKIE_SIZE]);
|
||||
static int cookie_timer_expired(struct timespec *, time_t, long);
|
||||
static void cookie_checker_make_cookie(struct cookie_checker *,
|
||||
uint8_t[COOKIE_COOKIE_SIZE], struct sockaddr *);
|
||||
static void ratelimit_gc(struct ratelimit *, int);
|
||||
static int ratelimit_allow(struct ratelimit *, struct sockaddr *);
|
||||
|
||||
/* Public Functions */
|
||||
void
|
||||
cookie_maker_init(struct cookie_maker *cp, const uint8_t key[COOKIE_INPUT_SIZE])
|
||||
{
|
||||
bzero(cp, sizeof(*cp));
|
||||
cookie_precompute_key(cp->cp_mac1_key, key, COOKIE_MAC1_KEY_LABEL);
|
||||
cookie_precompute_key(cp->cp_cookie_key, key, COOKIE_COOKIE_KEY_LABEL);
|
||||
rw_init(&cp->cp_lock, "cookie_maker");
|
||||
}
|
||||
|
||||
int
|
||||
cookie_checker_init(struct cookie_checker *cc, uma_zone_t zone)
|
||||
{
|
||||
struct ratelimit *rl = &cc->cc_ratelimit;
|
||||
bzero(cc, sizeof(*cc));
|
||||
|
||||
rw_init(&cc->cc_key_lock, "cookie_checker_key");
|
||||
rw_init(&cc->cc_secret_lock, "cookie_checker_secret");
|
||||
|
||||
rw_init(&rl->rl_lock, "ratelimit_lock");
|
||||
arc4random_buf(&rl->rl_secret, sizeof(rl->rl_secret));
|
||||
rl->rl_table = hashinit(RATELIMIT_SIZE, M_DEVBUF, &rl->rl_table_mask);
|
||||
rl->rl_zone = zone;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
cookie_checker_update(struct cookie_checker *cc,
|
||||
uint8_t key[COOKIE_INPUT_SIZE])
|
||||
{
|
||||
rw_enter_write(&cc->cc_key_lock);
|
||||
if (key) {
|
||||
cookie_precompute_key(cc->cc_mac1_key, key, COOKIE_MAC1_KEY_LABEL);
|
||||
cookie_precompute_key(cc->cc_cookie_key, key, COOKIE_COOKIE_KEY_LABEL);
|
||||
} else {
|
||||
bzero(cc->cc_mac1_key, sizeof(cc->cc_mac1_key));
|
||||
bzero(cc->cc_cookie_key, sizeof(cc->cc_cookie_key));
|
||||
}
|
||||
rw_exit_write(&cc->cc_key_lock);
|
||||
}
|
||||
|
||||
void
|
||||
cookie_checker_deinit(struct cookie_checker *cc)
|
||||
{
|
||||
struct ratelimit *rl = &cc->cc_ratelimit;
|
||||
|
||||
rw_enter_write(&rl->rl_lock);
|
||||
ratelimit_gc(rl, 1);
|
||||
hashdestroy(rl->rl_table, M_DEVBUF, rl->rl_table_mask);
|
||||
rw_exit_write(&rl->rl_lock);
|
||||
}
|
||||
|
||||
void
|
||||
cookie_checker_create_payload(struct cookie_checker *cc,
|
||||
struct cookie_macs *cm, uint8_t nonce[COOKIE_XNONCE_SIZE],
|
||||
uint8_t ecookie[COOKIE_ENCRYPTED_SIZE], struct sockaddr *sa)
|
||||
{
|
||||
uint8_t cookie[COOKIE_COOKIE_SIZE];
|
||||
|
||||
cookie_checker_make_cookie(cc, cookie, sa);
|
||||
arc4random_buf(nonce, COOKIE_XNONCE_SIZE);
|
||||
|
||||
rw_enter_read(&cc->cc_key_lock);
|
||||
xchacha20poly1305_encrypt(ecookie, cookie, COOKIE_COOKIE_SIZE,
|
||||
cm->mac1, COOKIE_MAC_SIZE, nonce, cc->cc_cookie_key);
|
||||
rw_exit_read(&cc->cc_key_lock);
|
||||
|
||||
explicit_bzero(cookie, sizeof(cookie));
|
||||
}
|
||||
|
||||
int
|
||||
cookie_maker_consume_payload(struct cookie_maker *cp,
|
||||
uint8_t nonce[COOKIE_XNONCE_SIZE], uint8_t ecookie[COOKIE_ENCRYPTED_SIZE])
|
||||
{
|
||||
int ret = 0;
|
||||
uint8_t cookie[COOKIE_COOKIE_SIZE];
|
||||
|
||||
rw_enter_write(&cp->cp_lock);
|
||||
|
||||
if (cp->cp_mac1_valid == 0) {
|
||||
ret = ETIMEDOUT;
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (xchacha20poly1305_decrypt(cookie, ecookie, COOKIE_ENCRYPTED_SIZE,
|
||||
cp->cp_mac1_last, COOKIE_MAC_SIZE, nonce, cp->cp_cookie_key) == 0) {
|
||||
ret = EINVAL;
|
||||
goto error;
|
||||
}
|
||||
|
||||
memcpy(cp->cp_cookie, cookie, COOKIE_COOKIE_SIZE);
|
||||
getnanouptime(&cp->cp_birthdate);
|
||||
cp->cp_mac1_valid = 0;
|
||||
|
||||
error:
|
||||
rw_exit_write(&cp->cp_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
cookie_maker_mac(struct cookie_maker *cp, struct cookie_macs *cm, void *buf,
|
||||
size_t len)
|
||||
{
|
||||
rw_enter_read(&cp->cp_lock);
|
||||
|
||||
cookie_macs_mac1(cm, buf, len, cp->cp_mac1_key);
|
||||
|
||||
memcpy(cp->cp_mac1_last, cm->mac1, COOKIE_MAC_SIZE);
|
||||
cp->cp_mac1_valid = 1;
|
||||
|
||||
if (!cookie_timer_expired(&cp->cp_birthdate,
|
||||
COOKIE_SECRET_MAX_AGE - COOKIE_SECRET_LATENCY, 0))
|
||||
cookie_macs_mac2(cm, buf, len, cp->cp_cookie);
|
||||
else
|
||||
bzero(cm->mac2, COOKIE_MAC_SIZE);
|
||||
|
||||
rw_exit_read(&cp->cp_lock);
|
||||
}
|
||||
|
||||
int
|
||||
cookie_checker_validate_macs(struct cookie_checker *cc, struct cookie_macs *cm,
|
||||
void *buf, size_t len, int busy, struct sockaddr *sa)
|
||||
{
|
||||
struct cookie_macs our_cm;
|
||||
uint8_t cookie[COOKIE_COOKIE_SIZE];
|
||||
|
||||
/* Validate incoming MACs */
|
||||
rw_enter_read(&cc->cc_key_lock);
|
||||
cookie_macs_mac1(&our_cm, buf, len, cc->cc_mac1_key);
|
||||
rw_exit_read(&cc->cc_key_lock);
|
||||
|
||||
/* If mac1 is invald, we want to drop the packet */
|
||||
if (timingsafe_bcmp(our_cm.mac1, cm->mac1, COOKIE_MAC_SIZE) != 0)
|
||||
return EINVAL;
|
||||
|
||||
if (busy != 0) {
|
||||
cookie_checker_make_cookie(cc, cookie, sa);
|
||||
cookie_macs_mac2(&our_cm, buf, len, cookie);
|
||||
|
||||
/* If the mac2 is invalid, we want to send a cookie response */
|
||||
if (timingsafe_bcmp(our_cm.mac2, cm->mac2, COOKIE_MAC_SIZE) != 0)
|
||||
return EAGAIN;
|
||||
|
||||
/* If the mac2 is valid, we may want rate limit the peer.
|
||||
* ratelimit_allow will return either 0 or ECONNREFUSED,
|
||||
* implying there is no ratelimiting, or we should ratelimit
|
||||
* (refuse) respectively. */
|
||||
return ratelimit_allow(&cc->cc_ratelimit, sa);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Private functions */
|
||||
static void
|
||||
cookie_precompute_key(uint8_t *key, const uint8_t input[COOKIE_INPUT_SIZE],
|
||||
const char *label)
|
||||
{
|
||||
struct blake2s_state blake;
|
||||
|
||||
blake2s_init(&blake, COOKIE_KEY_SIZE);
|
||||
blake2s_update(&blake, label, strlen(label));
|
||||
blake2s_update(&blake, input, COOKIE_INPUT_SIZE);
|
||||
blake2s_final(&blake, key, COOKIE_KEY_SIZE);
|
||||
}
|
||||
|
||||
static void
|
||||
cookie_macs_mac1(struct cookie_macs *cm, const void *buf, size_t len,
|
||||
const uint8_t key[COOKIE_KEY_SIZE])
|
||||
{
|
||||
struct blake2s_state state;
|
||||
blake2s_init_key(&state, COOKIE_MAC_SIZE, key, COOKIE_KEY_SIZE);
|
||||
blake2s_update(&state, buf, len);
|
||||
blake2s_final(&state, cm->mac1, COOKIE_MAC_SIZE);
|
||||
}
|
||||
|
||||
static void
|
||||
cookie_macs_mac2(struct cookie_macs *cm, const void *buf, size_t len,
|
||||
const uint8_t key[COOKIE_COOKIE_SIZE])
|
||||
{
|
||||
struct blake2s_state state;
|
||||
blake2s_init_key(&state, COOKIE_MAC_SIZE, key, COOKIE_COOKIE_SIZE);
|
||||
blake2s_update(&state, buf, len);
|
||||
blake2s_update(&state, cm->mac1, COOKIE_MAC_SIZE);
|
||||
blake2s_final(&state, cm->mac2, COOKIE_MAC_SIZE);
|
||||
}
|
||||
|
||||
static int
|
||||
cookie_timer_expired(struct timespec *birthdate, time_t sec, long nsec)
|
||||
{
|
||||
struct timespec uptime;
|
||||
struct timespec expire = { .tv_sec = sec, .tv_nsec = nsec };
|
||||
|
||||
if (birthdate->tv_sec == 0 && birthdate->tv_nsec == 0)
|
||||
return ETIMEDOUT;
|
||||
|
||||
getnanouptime(&uptime);
|
||||
timespecadd(birthdate, &expire, &expire);
|
||||
return timespeccmp(&uptime, &expire, >) ? ETIMEDOUT : 0;
|
||||
}
|
||||
|
||||
static void
|
||||
cookie_checker_make_cookie(struct cookie_checker *cc,
|
||||
uint8_t cookie[COOKIE_COOKIE_SIZE], struct sockaddr *sa)
|
||||
{
|
||||
struct blake2s_state state;
|
||||
|
||||
rw_enter_write(&cc->cc_secret_lock);
|
||||
if (cookie_timer_expired(&cc->cc_secret_birthdate,
|
||||
COOKIE_SECRET_MAX_AGE, 0)) {
|
||||
arc4random_buf(cc->cc_secret, COOKIE_SECRET_SIZE);
|
||||
getnanouptime(&cc->cc_secret_birthdate);
|
||||
}
|
||||
blake2s_init_key(&state, COOKIE_COOKIE_SIZE, cc->cc_secret,
|
||||
COOKIE_SECRET_SIZE);
|
||||
rw_exit_write(&cc->cc_secret_lock);
|
||||
|
||||
if (sa->sa_family == AF_INET) {
|
||||
blake2s_update(&state, (uint8_t *)&satosin(sa)->sin_addr,
|
||||
sizeof(struct in_addr));
|
||||
blake2s_update(&state, (uint8_t *)&satosin(sa)->sin_port,
|
||||
sizeof(in_port_t));
|
||||
blake2s_final(&state, cookie, COOKIE_COOKIE_SIZE);
|
||||
} else if (sa->sa_family == AF_INET6) {
|
||||
blake2s_update(&state, (uint8_t *)&satosin6(sa)->sin6_addr,
|
||||
sizeof(struct in6_addr));
|
||||
blake2s_update(&state, (uint8_t *)&satosin6(sa)->sin6_port,
|
||||
sizeof(in_port_t));
|
||||
blake2s_final(&state, cookie, COOKIE_COOKIE_SIZE);
|
||||
} else {
|
||||
arc4random_buf(cookie, COOKIE_COOKIE_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ratelimit_gc(struct ratelimit *rl, int force)
|
||||
{
|
||||
size_t i;
|
||||
struct ratelimit_entry *r, *tr;
|
||||
struct timespec expiry;
|
||||
|
||||
rw_assert(&rl->rl_lock, RA_WLOCKED);
|
||||
|
||||
if (force) {
|
||||
for (i = 0; i < RATELIMIT_SIZE; i++) {
|
||||
LIST_FOREACH_SAFE(r, &rl->rl_table[i], r_entry, tr) {
|
||||
rl->rl_table_num--;
|
||||
LIST_REMOVE(r, r_entry);
|
||||
uma_zfree(rl->rl_zone, r);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if ((cookie_timer_expired(&rl->rl_last_gc, ELEMENT_TIMEOUT, 0) &&
|
||||
rl->rl_table_num > 0)) {
|
||||
getnanouptime(&rl->rl_last_gc);
|
||||
getnanouptime(&expiry);
|
||||
expiry.tv_sec -= ELEMENT_TIMEOUT;
|
||||
|
||||
for (i = 0; i < RATELIMIT_SIZE; i++) {
|
||||
LIST_FOREACH_SAFE(r, &rl->rl_table[i], r_entry, tr) {
|
||||
if (timespeccmp(&r->r_last_time, &expiry, <)) {
|
||||
rl->rl_table_num--;
|
||||
LIST_REMOVE(r, r_entry);
|
||||
uma_zfree(rl->rl_zone, r);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
ratelimit_allow(struct ratelimit *rl, struct sockaddr *sa)
|
||||
{
|
||||
uint64_t key, tokens;
|
||||
struct timespec diff;
|
||||
struct ratelimit_entry *r;
|
||||
int ret = ECONNREFUSED;
|
||||
|
||||
if (sa->sa_family == AF_INET)
|
||||
key = siphash24(&rl->rl_secret, &satosin(sa)->sin_addr,
|
||||
IPV4_MASK_SIZE);
|
||||
else if (sa->sa_family == AF_INET6)
|
||||
key = siphash24(&rl->rl_secret, &satosin6(sa)->sin6_addr,
|
||||
IPV6_MASK_SIZE);
|
||||
else
|
||||
return ret;
|
||||
|
||||
rw_enter_write(&rl->rl_lock);
|
||||
|
||||
LIST_FOREACH(r, &rl->rl_table[key & rl->rl_table_mask], r_entry) {
|
||||
if (r->r_af != sa->sa_family)
|
||||
continue;
|
||||
|
||||
if (r->r_af == AF_INET && bcmp(&r->r_in,
|
||||
&satosin(sa)->sin_addr, IPV4_MASK_SIZE) != 0)
|
||||
continue;
|
||||
|
||||
if (r->r_af == AF_INET6 && bcmp(&r->r_in6,
|
||||
&satosin6(sa)->sin6_addr, IPV6_MASK_SIZE) != 0)
|
||||
continue;
|
||||
|
||||
/* If we get to here, we've found an entry for the endpoint.
|
||||
* We apply standard token bucket, by calculating the time
|
||||
* lapsed since our last_time, adding that, ensuring that we
|
||||
* cap the tokens at TOKEN_MAX. If the endpoint has no tokens
|
||||
* left (that is tokens <= INITIATION_COST) then we block the
|
||||
* request, otherwise we subtract the INITITIATION_COST and
|
||||
* return OK. */
|
||||
diff = r->r_last_time;
|
||||
getnanouptime(&r->r_last_time);
|
||||
timespecsub(&r->r_last_time, &diff, &diff);
|
||||
|
||||
tokens = r->r_tokens + diff.tv_sec * NSEC_PER_SEC + diff.tv_nsec;
|
||||
|
||||
if (tokens > TOKEN_MAX)
|
||||
tokens = TOKEN_MAX;
|
||||
|
||||
if (tokens > INITIATION_COST) {
|
||||
r->r_tokens = tokens - INITIATION_COST;
|
||||
goto ok;
|
||||
} else {
|
||||
r->r_tokens = tokens;
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
/* If we get to here, we didn't have an entry for the endpoint. */
|
||||
ratelimit_gc(rl, 0);
|
||||
|
||||
/* Hard limit on number of entries */
|
||||
if (rl->rl_table_num >= RATELIMIT_SIZE_MAX * 8)
|
||||
goto error;
|
||||
|
||||
/* Goto error if out of memory */
|
||||
if ((r = uma_zalloc(rl->rl_zone, M_NOWAIT)) == NULL)
|
||||
goto error;
|
||||
|
||||
rl->rl_table_num++;
|
||||
|
||||
/* Insert entry into the hashtable and ensure it's initialised */
|
||||
LIST_INSERT_HEAD(&rl->rl_table[key & rl->rl_table_mask], r, r_entry);
|
||||
r->r_af = sa->sa_family;
|
||||
if (r->r_af == AF_INET)
|
||||
memcpy(&r->r_in, &satosin(sa)->sin_addr, IPV4_MASK_SIZE);
|
||||
else if (r->r_af == AF_INET6)
|
||||
memcpy(&r->r_in6, &satosin6(sa)->sin6_addr, IPV6_MASK_SIZE);
|
||||
|
||||
getnanouptime(&r->r_last_time);
|
||||
r->r_tokens = TOKEN_MAX - INITIATION_COST;
|
||||
ok:
|
||||
ret = 0;
|
||||
error:
|
||||
rw_exit_write(&rl->rl_lock);
|
||||
return ret;
|
||||
}
|
958
sys/dev/if_wg/module/wg_noise.c
Normal file
958
sys/dev/if_wg/module/wg_noise.c
Normal file
@ -0,0 +1,958 @@
|
||||
/*
|
||||
* Copyright (C) 2015-2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
* Copyright (C) 2019-2020 Matt Dunwoodie <ncon@noconroy.net>
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/param.h>
|
||||
|
||||
#include <sys/rwlock.h>
|
||||
|
||||
#include <sys/wg_noise.h>
|
||||
#include <crypto/blake2s.h>
|
||||
#include <crypto/curve25519.h>
|
||||
#include <zinc/chacha20poly1305.h>
|
||||
|
||||
/* Private functions */
|
||||
static struct noise_keypair *
|
||||
noise_remote_keypair_allocate(struct noise_remote *);
|
||||
static void
|
||||
noise_remote_keypair_free(struct noise_remote *,
|
||||
struct noise_keypair *);
|
||||
static uint32_t noise_remote_handshake_index_get(struct noise_remote *);
|
||||
static void noise_remote_handshake_index_drop(struct noise_remote *);
|
||||
|
||||
static uint64_t noise_counter_send(struct noise_counter *);
|
||||
static int noise_counter_recv(struct noise_counter *, uint64_t);
|
||||
|
||||
static void noise_kdf(uint8_t *, uint8_t *, uint8_t *, const uint8_t *,
|
||||
size_t, size_t, size_t, size_t,
|
||||
const uint8_t [NOISE_HASH_SIZE]);
|
||||
static int noise_mix_dh(
|
||||
uint8_t [NOISE_HASH_SIZE],
|
||||
uint8_t [NOISE_SYMMETRIC_SIZE],
|
||||
const uint8_t [NOISE_KEY_SIZE],
|
||||
const uint8_t [NOISE_KEY_SIZE]);
|
||||
static int noise_mix_ss(
|
||||
uint8_t ck[NOISE_HASH_SIZE],
|
||||
uint8_t key[NOISE_SYMMETRIC_SIZE],
|
||||
const uint8_t ss[NOISE_KEY_SIZE]);
|
||||
static void noise_mix_hash(
|
||||
uint8_t [NOISE_HASH_SIZE],
|
||||
const uint8_t *,
|
||||
size_t);
|
||||
static void noise_mix_psk(
|
||||
uint8_t [NOISE_HASH_SIZE],
|
||||
uint8_t [NOISE_HASH_SIZE],
|
||||
uint8_t [NOISE_SYMMETRIC_SIZE],
|
||||
const uint8_t [NOISE_KEY_SIZE]);
|
||||
static void noise_param_init(
|
||||
uint8_t [NOISE_HASH_SIZE],
|
||||
uint8_t [NOISE_HASH_SIZE],
|
||||
const uint8_t [NOISE_KEY_SIZE]);
|
||||
|
||||
static void noise_msg_encrypt(uint8_t *, const uint8_t *, size_t,
|
||||
uint8_t [NOISE_SYMMETRIC_SIZE],
|
||||
uint8_t [NOISE_HASH_SIZE]);
|
||||
static int noise_msg_decrypt(uint8_t *, const uint8_t *, size_t,
|
||||
uint8_t [NOISE_SYMMETRIC_SIZE],
|
||||
uint8_t [NOISE_HASH_SIZE]);
|
||||
static void noise_msg_ephemeral(
|
||||
uint8_t [NOISE_HASH_SIZE],
|
||||
uint8_t [NOISE_HASH_SIZE],
|
||||
const uint8_t src[NOISE_KEY_SIZE]);
|
||||
|
||||
static void noise_tai64n_now(uint8_t [NOISE_TIMESTAMP_SIZE]);
|
||||
static int noise_timer_expired(struct timespec *, time_t, long);
|
||||
|
||||
/* Set/Get noise parameters */
|
||||
void
|
||||
noise_local_init(struct noise_local *l, struct noise_upcall *upcall)
|
||||
{
|
||||
bzero(l, sizeof(*l));
|
||||
rw_init(&l->l_identity_lock, "noise_local_identity");
|
||||
l->l_upcall = *upcall;
|
||||
}
|
||||
|
||||
void
|
||||
noise_local_lock_identity(struct noise_local *l)
|
||||
{
|
||||
rw_enter_write(&l->l_identity_lock);
|
||||
}
|
||||
|
||||
void
|
||||
noise_local_unlock_identity(struct noise_local *l)
|
||||
{
|
||||
rw_exit_write(&l->l_identity_lock);
|
||||
}
|
||||
|
||||
int
|
||||
noise_local_set_private(struct noise_local *l, uint8_t private[NOISE_KEY_SIZE])
|
||||
{
|
||||
|
||||
memcpy(l->l_private, private, NOISE_KEY_SIZE);
|
||||
curve25519_clamp_secret(l->l_private);
|
||||
l->l_has_identity = curve25519_generate_public(l->l_public, private);
|
||||
|
||||
return l->l_has_identity ? 0 : ENXIO;
|
||||
}
|
||||
|
||||
int
|
||||
noise_local_keys(struct noise_local *l, uint8_t public[NOISE_KEY_SIZE],
|
||||
uint8_t private[NOISE_KEY_SIZE])
|
||||
{
|
||||
int ret = 0;
|
||||
rw_enter_read(&l->l_identity_lock);
|
||||
if (l->l_has_identity) {
|
||||
if (public != NULL)
|
||||
memcpy(public, l->l_public, NOISE_KEY_SIZE);
|
||||
if (private != NULL)
|
||||
memcpy(private, l->l_private, NOISE_KEY_SIZE);
|
||||
} else {
|
||||
ret = ENXIO;
|
||||
}
|
||||
rw_exit_read(&l->l_identity_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
noise_remote_init(struct noise_remote *r, const uint8_t public[NOISE_KEY_SIZE],
|
||||
struct noise_local *l)
|
||||
{
|
||||
bzero(r, sizeof(*r));
|
||||
memcpy(r->r_public, public, NOISE_KEY_SIZE);
|
||||
rw_init(&r->r_handshake_lock, "noise_handshake");
|
||||
rw_init(&r->r_keypair_lock, "noise_keypair");
|
||||
|
||||
SLIST_INSERT_HEAD(&r->r_unused_keypairs, &r->r_keypair[0], kp_entry);
|
||||
SLIST_INSERT_HEAD(&r->r_unused_keypairs, &r->r_keypair[1], kp_entry);
|
||||
SLIST_INSERT_HEAD(&r->r_unused_keypairs, &r->r_keypair[2], kp_entry);
|
||||
|
||||
ASSERT(l != NULL);
|
||||
r->r_local = l;
|
||||
|
||||
rw_enter_write(&l->l_identity_lock);
|
||||
noise_remote_precompute(r);
|
||||
rw_exit_write(&l->l_identity_lock);
|
||||
}
|
||||
|
||||
int
|
||||
noise_remote_set_psk(struct noise_remote *r, const uint8_t psk[NOISE_PSK_SIZE])
|
||||
{
|
||||
int same;
|
||||
rw_enter_write(&r->r_handshake_lock);
|
||||
same = !timingsafe_bcmp(r->r_psk, psk, NOISE_PSK_SIZE);
|
||||
if (!same) {
|
||||
memcpy(r->r_psk, psk, NOISE_PSK_SIZE);
|
||||
}
|
||||
rw_exit_write(&r->r_handshake_lock);
|
||||
return same ? EEXIST : 0;
|
||||
}
|
||||
|
||||
int
|
||||
noise_remote_keys(struct noise_remote *r, uint8_t public[NOISE_KEY_SIZE],
|
||||
uint8_t psk[NOISE_PSK_SIZE])
|
||||
{
|
||||
static uint8_t null_psk[NOISE_PSK_SIZE];
|
||||
int ret;
|
||||
|
||||
if (public != NULL)
|
||||
memcpy(public, r->r_public, NOISE_KEY_SIZE);
|
||||
|
||||
rw_enter_read(&r->r_handshake_lock);
|
||||
if (psk != NULL)
|
||||
memcpy(psk, r->r_psk, NOISE_PSK_SIZE);
|
||||
ret = timingsafe_bcmp(r->r_psk, null_psk, NOISE_PSK_SIZE);
|
||||
rw_exit_read(&r->r_handshake_lock);
|
||||
|
||||
/* If r_psk != null_psk return 0, else ENOENT (no psk) */
|
||||
return ret ? 0 : ENOENT;
|
||||
}
|
||||
|
||||
void
|
||||
noise_remote_precompute(struct noise_remote *r)
|
||||
{
|
||||
struct noise_local *l = r->r_local;
|
||||
if (!l->l_has_identity)
|
||||
bzero(r->r_ss, NOISE_KEY_SIZE);
|
||||
else if (!curve25519(r->r_ss, l->l_private, r->r_public))
|
||||
bzero(r->r_ss, NOISE_KEY_SIZE);
|
||||
|
||||
rw_enter_write(&r->r_handshake_lock);
|
||||
noise_remote_handshake_index_drop(r);
|
||||
explicit_bzero(&r->r_handshake, sizeof(r->r_handshake));
|
||||
rw_exit_write(&r->r_handshake_lock);
|
||||
}
|
||||
|
||||
/* Handshake functions */
|
||||
int
|
||||
noise_create_initiation(struct noise_remote *r, struct noise_initiation *init)
|
||||
{
|
||||
struct noise_handshake *hs = &r->r_handshake;
|
||||
struct noise_local *l = r->r_local;
|
||||
uint8_t key[NOISE_SYMMETRIC_SIZE];
|
||||
int ret = EINVAL;
|
||||
|
||||
rw_enter_read(&l->l_identity_lock);
|
||||
rw_enter_write(&r->r_handshake_lock);
|
||||
if (!l->l_has_identity)
|
||||
goto error;
|
||||
noise_param_init(hs->hs_ck, hs->hs_hash, r->r_public);
|
||||
|
||||
/* e */
|
||||
curve25519_generate_secret(hs->hs_e);
|
||||
if (curve25519_generate_public(init->ue, hs->hs_e) == 0)
|
||||
goto error;
|
||||
noise_msg_ephemeral(hs->hs_ck, hs->hs_hash, init->ue);
|
||||
|
||||
/* es */
|
||||
if (noise_mix_dh(hs->hs_ck, key, hs->hs_e, r->r_public) != 0)
|
||||
goto error;
|
||||
|
||||
/* s */
|
||||
noise_msg_encrypt(init->es, l->l_public,
|
||||
NOISE_KEY_SIZE, key, hs->hs_hash);
|
||||
|
||||
/* ss */
|
||||
if (noise_mix_ss(hs->hs_ck, key, r->r_ss) != 0)
|
||||
goto error;
|
||||
|
||||
/* {t} */
|
||||
noise_tai64n_now(init->ets);
|
||||
noise_msg_encrypt(init->ets, init->ets,
|
||||
NOISE_TIMESTAMP_SIZE, key, hs->hs_hash);
|
||||
|
||||
noise_remote_handshake_index_drop(r);
|
||||
hs->hs_state = CREATED_INITIATION;
|
||||
hs->hs_local_index = noise_remote_handshake_index_get(r);
|
||||
init->s_idx = hs->hs_local_index;
|
||||
ret = 0;
|
||||
error:
|
||||
rw_exit_write(&r->r_handshake_lock);
|
||||
rw_exit_read(&l->l_identity_lock);
|
||||
if (ret != 0)
|
||||
explicit_bzero(init, sizeof(*init));
|
||||
explicit_bzero(key, NOISE_SYMMETRIC_SIZE);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
noise_consume_initiation(struct noise_local *l, struct noise_remote **rp,
|
||||
struct noise_initiation *init)
|
||||
{
|
||||
struct noise_remote *r;
|
||||
struct noise_handshake hs;
|
||||
uint8_t key[NOISE_SYMMETRIC_SIZE];
|
||||
uint8_t r_public[NOISE_KEY_SIZE];
|
||||
uint8_t timestamp[NOISE_TIMESTAMP_SIZE];
|
||||
int ret = EINVAL;
|
||||
|
||||
rw_enter_read(&l->l_identity_lock);
|
||||
if (!l->l_has_identity)
|
||||
goto error;
|
||||
noise_param_init(hs.hs_ck, hs.hs_hash, l->l_public);
|
||||
|
||||
/* e */
|
||||
noise_msg_ephemeral(hs.hs_ck, hs.hs_hash, init->ue);
|
||||
|
||||
/* es */
|
||||
if (noise_mix_dh(hs.hs_ck, key, l->l_private, init->ue) != 0)
|
||||
goto error;
|
||||
|
||||
/* s */
|
||||
if (noise_msg_decrypt(r_public, init->es,
|
||||
NOISE_KEY_SIZE + NOISE_MAC_SIZE, key, hs.hs_hash) != 0)
|
||||
goto error;
|
||||
|
||||
/* Lookup the remote we received from */
|
||||
if ((r = l->l_upcall.u_remote_get(l->l_upcall.u_arg, r_public)) == NULL)
|
||||
goto error;
|
||||
|
||||
/* ss */
|
||||
if (noise_mix_ss(hs.hs_ck, key, r->r_ss) != 0)
|
||||
goto error;
|
||||
|
||||
/* {t} */
|
||||
if (noise_msg_decrypt(timestamp, init->ets,
|
||||
NOISE_TIMESTAMP_SIZE + NOISE_MAC_SIZE, key, hs.hs_hash) != 0)
|
||||
goto error;
|
||||
|
||||
hs.hs_state = CONSUMED_INITIATION;
|
||||
hs.hs_local_index = 0;
|
||||
hs.hs_remote_index = init->s_idx;
|
||||
memcpy(hs.hs_e, init->ue, NOISE_KEY_SIZE);
|
||||
|
||||
/* We have successfully computed the same results, now we ensure that
|
||||
* this is not an initiation replay, or a flood attack */
|
||||
rw_enter_write(&r->r_handshake_lock);
|
||||
|
||||
/* Replay */
|
||||
if (memcmp(timestamp, r->r_timestamp, NOISE_TIMESTAMP_SIZE) > 0)
|
||||
memcpy(r->r_timestamp, timestamp, NOISE_TIMESTAMP_SIZE);
|
||||
else
|
||||
goto error_set;
|
||||
/* Flood attack */
|
||||
if (noise_timer_expired(&r->r_last_init, 0, REJECT_INTERVAL))
|
||||
getnanouptime(&r->r_last_init);
|
||||
else
|
||||
goto error_set;
|
||||
|
||||
/* Ok, we're happy to accept this initiation now */
|
||||
noise_remote_handshake_index_drop(r);
|
||||
r->r_handshake = hs;
|
||||
*rp = r;
|
||||
ret = 0;
|
||||
error_set:
|
||||
rw_exit_write(&r->r_handshake_lock);
|
||||
error:
|
||||
rw_exit_read(&l->l_identity_lock);
|
||||
explicit_bzero(key, NOISE_SYMMETRIC_SIZE);
|
||||
explicit_bzero(&hs, sizeof(hs));
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
noise_create_response(struct noise_remote *r, struct noise_response *resp)
|
||||
{
|
||||
struct noise_handshake *hs = &r->r_handshake;
|
||||
uint8_t key[NOISE_SYMMETRIC_SIZE];
|
||||
uint8_t e[NOISE_KEY_SIZE];
|
||||
int ret = EINVAL;
|
||||
|
||||
rw_enter_read(&r->r_local->l_identity_lock);
|
||||
rw_enter_write(&r->r_handshake_lock);
|
||||
|
||||
if (hs->hs_state != CONSUMED_INITIATION)
|
||||
goto error;
|
||||
|
||||
/* e */
|
||||
curve25519_generate_secret(e);
|
||||
if (curve25519_generate_public(resp->ue, e) == 0)
|
||||
goto error;
|
||||
noise_msg_ephemeral(hs->hs_ck, hs->hs_hash, resp->ue);
|
||||
|
||||
/* ee */
|
||||
if (noise_mix_dh(hs->hs_ck, NULL, e, hs->hs_e) != 0)
|
||||
goto error;
|
||||
|
||||
/* se */
|
||||
if (noise_mix_dh(hs->hs_ck, NULL, e, r->r_public) != 0)
|
||||
goto error;
|
||||
|
||||
/* psk */
|
||||
noise_mix_psk(hs->hs_ck, hs->hs_hash, key, r->r_psk);
|
||||
|
||||
/* {} */
|
||||
noise_msg_encrypt(resp->en, NULL, 0, key, hs->hs_hash);
|
||||
|
||||
hs->hs_state = CREATED_RESPONSE;
|
||||
hs->hs_local_index = noise_remote_handshake_index_get(r);
|
||||
resp->r_idx = hs->hs_remote_index;
|
||||
resp->s_idx = hs->hs_local_index;
|
||||
ret = 0;
|
||||
error:
|
||||
rw_exit_write(&r->r_handshake_lock);
|
||||
rw_exit_read(&r->r_local->l_identity_lock);
|
||||
if (ret != 0)
|
||||
explicit_bzero(resp, sizeof(*resp));
|
||||
explicit_bzero(key, NOISE_SYMMETRIC_SIZE);
|
||||
explicit_bzero(e, NOISE_KEY_SIZE);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
noise_consume_response(struct noise_remote *r, struct noise_response *resp)
|
||||
{
|
||||
struct noise_local *l = r->r_local;
|
||||
struct noise_handshake hs;
|
||||
uint8_t key[NOISE_SYMMETRIC_SIZE];
|
||||
uint8_t preshared_key[NOISE_KEY_SIZE];
|
||||
int ret = EINVAL;
|
||||
|
||||
rw_enter_read(&l->l_identity_lock);
|
||||
if (!l->l_has_identity)
|
||||
goto error;
|
||||
|
||||
rw_enter_read(&r->r_handshake_lock);
|
||||
hs = r->r_handshake;
|
||||
memcpy(preshared_key, r->r_psk, NOISE_PSK_SIZE);
|
||||
rw_exit_read(&r->r_handshake_lock);
|
||||
|
||||
if (hs.hs_state != CREATED_INITIATION ||
|
||||
hs.hs_local_index != resp->r_idx)
|
||||
goto error;
|
||||
|
||||
/* e */
|
||||
noise_msg_ephemeral(hs.hs_ck, hs.hs_hash, resp->ue);
|
||||
|
||||
/* ee */
|
||||
if (noise_mix_dh(hs.hs_ck, NULL, hs.hs_e, resp->ue) != 0)
|
||||
goto error;
|
||||
|
||||
/* se */
|
||||
if (noise_mix_dh(hs.hs_ck, NULL, l->l_private, resp->ue) != 0)
|
||||
goto error;
|
||||
|
||||
/* psk */
|
||||
noise_mix_psk(hs.hs_ck, hs.hs_hash, key, preshared_key);
|
||||
|
||||
/* {} */
|
||||
if (noise_msg_decrypt(NULL, resp->en,
|
||||
0 + NOISE_MAC_SIZE, key, hs.hs_hash) != 0)
|
||||
goto error;
|
||||
|
||||
hs.hs_remote_index = resp->s_idx;
|
||||
|
||||
rw_enter_write(&r->r_handshake_lock);
|
||||
if (r->r_handshake.hs_state == hs.hs_state &&
|
||||
r->r_handshake.hs_local_index == hs.hs_local_index) {
|
||||
r->r_handshake = hs;
|
||||
r->r_handshake.hs_state = CONSUMED_RESPONSE;
|
||||
ret = 0;
|
||||
}
|
||||
rw_exit_write(&r->r_handshake_lock);
|
||||
error:
|
||||
rw_exit_read(&l->l_identity_lock);
|
||||
explicit_bzero(&hs, sizeof(hs));
|
||||
explicit_bzero(key, NOISE_SYMMETRIC_SIZE);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
noise_remote_begin_session(struct noise_remote *r)
|
||||
{
|
||||
struct noise_handshake *hs = &r->r_handshake;
|
||||
struct noise_keypair kp, *next, *current, *previous;
|
||||
|
||||
rw_enter_write(&r->r_handshake_lock);
|
||||
|
||||
/* We now derive the keypair from the handshake */
|
||||
if (hs->hs_state == CONSUMED_RESPONSE) {
|
||||
kp.kp_is_initiator = 1;
|
||||
noise_kdf(kp.kp_send, kp.kp_recv, NULL, NULL,
|
||||
NOISE_SYMMETRIC_SIZE, NOISE_SYMMETRIC_SIZE, 0, 0,
|
||||
hs->hs_ck);
|
||||
} else if (hs->hs_state == CREATED_RESPONSE) {
|
||||
kp.kp_is_initiator = 0;
|
||||
noise_kdf(kp.kp_recv, kp.kp_send, NULL, NULL,
|
||||
NOISE_SYMMETRIC_SIZE, NOISE_SYMMETRIC_SIZE, 0, 0,
|
||||
hs->hs_ck);
|
||||
} else {
|
||||
rw_exit_write(&r->r_keypair_lock);
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
kp.kp_valid = 1;
|
||||
kp.kp_local_index = hs->hs_local_index;
|
||||
kp.kp_remote_index = hs->hs_remote_index;
|
||||
getnanouptime(&kp.kp_birthdate);
|
||||
bzero(&kp.kp_ctr, sizeof(kp.kp_ctr));
|
||||
rw_init(&kp.kp_ctr.c_lock, "noise_counter");
|
||||
|
||||
/* Now we need to add_new_keypair */
|
||||
rw_enter_write(&r->r_keypair_lock);
|
||||
next = r->r_next;
|
||||
current = r->r_current;
|
||||
previous = r->r_previous;
|
||||
|
||||
if (kp.kp_is_initiator) {
|
||||
if (next != NULL) {
|
||||
r->r_next = NULL;
|
||||
r->r_previous = next;
|
||||
noise_remote_keypair_free(r, current);
|
||||
} else {
|
||||
r->r_previous = current;
|
||||
}
|
||||
|
||||
noise_remote_keypair_free(r, previous);
|
||||
|
||||
r->r_current = noise_remote_keypair_allocate(r);
|
||||
*r->r_current = kp;
|
||||
} else {
|
||||
noise_remote_keypair_free(r, next);
|
||||
r->r_previous = NULL;
|
||||
noise_remote_keypair_free(r, previous);
|
||||
|
||||
r->r_next = noise_remote_keypair_allocate(r);
|
||||
*r->r_next = kp;
|
||||
}
|
||||
rw_exit_write(&r->r_keypair_lock);
|
||||
|
||||
explicit_bzero(&r->r_handshake, sizeof(r->r_handshake));
|
||||
rw_exit_write(&r->r_handshake_lock);
|
||||
|
||||
explicit_bzero(&kp, sizeof(kp));
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
noise_remote_clear(struct noise_remote *r)
|
||||
{
|
||||
rw_enter_write(&r->r_handshake_lock);
|
||||
noise_remote_handshake_index_drop(r);
|
||||
explicit_bzero(&r->r_handshake, sizeof(r->r_handshake));
|
||||
rw_exit_write(&r->r_handshake_lock);
|
||||
|
||||
rw_enter_write(&r->r_keypair_lock);
|
||||
noise_remote_keypair_free(r, r->r_next);
|
||||
noise_remote_keypair_free(r, r->r_current);
|
||||
noise_remote_keypair_free(r, r->r_previous);
|
||||
rw_exit_write(&r->r_keypair_lock);
|
||||
}
|
||||
|
||||
void
|
||||
noise_remote_expire_current(struct noise_remote *r)
|
||||
{
|
||||
rw_enter_write(&r->r_keypair_lock);
|
||||
if (r->r_next != NULL)
|
||||
r->r_next->kp_valid = 0;
|
||||
if (r->r_current != NULL)
|
||||
r->r_current->kp_valid = 0;
|
||||
rw_exit_write(&r->r_keypair_lock);
|
||||
}
|
||||
|
||||
int
|
||||
noise_remote_ready(struct noise_remote *r)
|
||||
{
|
||||
struct noise_keypair *kp;
|
||||
int ret;
|
||||
|
||||
rw_enter_read(&r->r_keypair_lock);
|
||||
/* kp_ctr isn't locked here, we're happy to accept a racy read. */
|
||||
if ((kp = r->r_current) == NULL ||
|
||||
!kp->kp_valid ||
|
||||
noise_timer_expired(&kp->kp_birthdate, REJECT_AFTER_TIME, 0) ||
|
||||
kp->kp_ctr.c_recv >= REJECT_AFTER_MESSAGES ||
|
||||
kp->kp_ctr.c_send >= REJECT_AFTER_MESSAGES)
|
||||
ret = EINVAL;
|
||||
else
|
||||
ret = 0;
|
||||
rw_exit_read(&r->r_keypair_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
noise_remote_encrypt(struct noise_remote *r, struct noise_data *data,
|
||||
size_t len)
|
||||
{
|
||||
struct noise_keypair *kp;
|
||||
uint64_t ctr;
|
||||
int ret = EINVAL;
|
||||
|
||||
rw_enter_read(&r->r_keypair_lock);
|
||||
if ((kp = r->r_current) == NULL)
|
||||
goto error;
|
||||
|
||||
/* We confirm that our values are within our tolerances. We want:
|
||||
* - a valid keypair
|
||||
* - our keypair to be less than REJECT_AFTER_TIME seconds old
|
||||
* - our receive counter to be less than REJECT_AFTER_MESSAGES
|
||||
* - our send counter to be less than REJECT_AFTER_MESSAGES
|
||||
*
|
||||
* kp_ctr isn't locked here, we're happy to accept a racy read. */
|
||||
if (!kp->kp_valid ||
|
||||
noise_timer_expired(&kp->kp_birthdate, REJECT_AFTER_TIME, 0) ||
|
||||
kp->kp_ctr.c_recv >= REJECT_AFTER_MESSAGES ||
|
||||
((ctr = noise_counter_send(&kp->kp_ctr)) > REJECT_AFTER_MESSAGES))
|
||||
goto error;
|
||||
|
||||
/* Ensure that our counter is little endian and then encrypt our
|
||||
* payload. We encrypt into the same buffer, so the caller must ensure
|
||||
* that buf has NOISE_MAC_SIZE bytes to store the MAC. The nonce and
|
||||
* index are passed back out to the caller through the provided
|
||||
* data pointer. */
|
||||
data->nonce = htole64(ctr);
|
||||
data->r_idx = kp->kp_remote_index;
|
||||
chacha20poly1305_encrypt(data->buf, data->buf, len,
|
||||
NULL, 0, data->nonce, kp->kp_send);
|
||||
|
||||
/* If our values are still within tolerances, but we are approaching
|
||||
* the tolerances, we notify the caller with ESTALE that they should
|
||||
* establish a new keypair. The current keypair can continue to be used
|
||||
* until the tolerances are hit. We notify if:
|
||||
* - our send counter is not less than REKEY_AFTER_MESSAGES
|
||||
* - we're the initiator and our keypair is older than
|
||||
* REKEY_AFTER_TIME seconds */
|
||||
ret = ESTALE;
|
||||
if (ctr >= REKEY_AFTER_MESSAGES)
|
||||
goto error;
|
||||
if (kp->kp_is_initiator &&
|
||||
noise_timer_expired(&kp->kp_birthdate, REKEY_AFTER_TIME, 0))
|
||||
goto error;
|
||||
|
||||
ret = 0;
|
||||
error:
|
||||
rw_exit_read(&r->r_keypair_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
noise_remote_decrypt(struct noise_remote *r, struct noise_data *data,
|
||||
size_t len)
|
||||
{
|
||||
struct noise_keypair *kp;
|
||||
uint64_t ctr;
|
||||
int ret = EINVAL;
|
||||
|
||||
/* We retrieve the keypair corresponding to the provided index. We
|
||||
* attempt the current keypair first as that is most likely. We also
|
||||
* want to make sure that the keypair is valid as it would be
|
||||
* catastrophic to decrypt against a zero'ed keypair. */
|
||||
rw_enter_read(&r->r_keypair_lock);
|
||||
|
||||
if (r->r_current != NULL && r->r_current->kp_local_index == data->r_idx) {
|
||||
kp = r->r_current;
|
||||
} else if (r->r_previous != NULL && r->r_previous->kp_local_index == data->r_idx) {
|
||||
kp = r->r_previous;
|
||||
} else if (r->r_next != NULL && r->r_next->kp_local_index == data->r_idx) {
|
||||
kp = r->r_next;
|
||||
} else {
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* We confirm that our values are within our tolerances. These values
|
||||
* are the same as the encrypt routine.
|
||||
*
|
||||
* kp_ctr isn't locked here, we're happy to accept a racy read. */
|
||||
if (noise_timer_expired(&kp->kp_birthdate, REJECT_AFTER_TIME, 0) ||
|
||||
kp->kp_ctr.c_send >= REJECT_AFTER_MESSAGES ||
|
||||
kp->kp_ctr.c_recv >= REJECT_AFTER_MESSAGES)
|
||||
goto error;
|
||||
|
||||
/* Ensure we've got the counter in host byte order, then decrypt,
|
||||
* then validate the counter. We don't want to validate the counter
|
||||
* before decrypting as we do not know the message is authentic prior
|
||||
* to decryption. */
|
||||
ctr = letoh64(data->nonce);
|
||||
|
||||
if (chacha20poly1305_decrypt(data->buf, data->buf, len,
|
||||
NULL, 0, data->nonce, kp->kp_recv) == 0)
|
||||
goto error;
|
||||
|
||||
if (noise_counter_recv(&kp->kp_ctr, ctr) != 0)
|
||||
goto error;
|
||||
|
||||
/* If we've received the handshake confirming data packet then move the
|
||||
* next keypair into current. If we do slide the next keypair in, then
|
||||
* we skip the REKEY_AFTER_TIME_RECV check. This is safe to do as a
|
||||
* data packet can't confirm a session that we are an INITIATOR of. */
|
||||
if (kp == r->r_next) {
|
||||
rw_exit_read(&r->r_keypair_lock);
|
||||
rw_enter_write(&r->r_keypair_lock);
|
||||
if (kp == r->r_next && kp->kp_local_index == data->r_idx) {
|
||||
noise_remote_keypair_free(r, r->r_previous);
|
||||
r->r_previous = r->r_current;
|
||||
r->r_current = r->r_next;
|
||||
r->r_next = NULL;
|
||||
|
||||
ret = ECONNRESET;
|
||||
goto error;
|
||||
}
|
||||
rw_downgrade(&r->r_keypair_lock);
|
||||
}
|
||||
|
||||
/* Similar to when we encrypt, we want to notify the caller when we
|
||||
* are approaching our tolerances. We notify if:
|
||||
* - we're the initiator and the current keypair is older than
|
||||
* REKEY_AFTER_TIME_RECV seconds. */
|
||||
ret = ESTALE;
|
||||
kp = r->r_current;
|
||||
if (kp->kp_is_initiator &&
|
||||
noise_timer_expired(&kp->kp_birthdate, REKEY_AFTER_TIME_RECV, 0))
|
||||
goto error;
|
||||
|
||||
ret = 0;
|
||||
|
||||
error:
|
||||
rw_exit(&r->r_keypair_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Private functions - these should not be called outside this file under any
|
||||
* circumstances. */
|
||||
static struct noise_keypair *
|
||||
noise_remote_keypair_allocate(struct noise_remote *r)
|
||||
{
|
||||
struct noise_keypair *kp;
|
||||
kp = SLIST_FIRST(&r->r_unused_keypairs);
|
||||
SLIST_REMOVE_HEAD(&r->r_unused_keypairs, kp_entry);
|
||||
return kp;
|
||||
}
|
||||
|
||||
static void
|
||||
noise_remote_keypair_free(struct noise_remote *r, struct noise_keypair *kp)
|
||||
{
|
||||
struct noise_upcall *u = &r->r_local->l_upcall;
|
||||
if (kp != NULL) {
|
||||
SLIST_INSERT_HEAD(&r->r_unused_keypairs, kp, kp_entry);
|
||||
u->u_index_drop(u->u_arg, kp->kp_local_index);
|
||||
bzero(kp->kp_send, sizeof(kp->kp_send));
|
||||
bzero(kp->kp_recv, sizeof(kp->kp_recv));
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
noise_remote_handshake_index_get(struct noise_remote *r)
|
||||
{
|
||||
struct noise_upcall *u = &r->r_local->l_upcall;
|
||||
return u->u_index_set(u->u_arg, r);
|
||||
}
|
||||
|
||||
static void
|
||||
noise_remote_handshake_index_drop(struct noise_remote *r)
|
||||
{
|
||||
struct noise_handshake *hs = &r->r_handshake;
|
||||
struct noise_upcall *u = &r->r_local->l_upcall;
|
||||
|
||||
rw_assert(&r->r_handshake_lock, RA_WLOCKED);
|
||||
if (hs->hs_state != HS_ZEROED)
|
||||
u->u_index_drop(u->u_arg, hs->hs_local_index);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
noise_counter_send(struct noise_counter *ctr)
|
||||
{
|
||||
uint64_t ret;
|
||||
rw_enter_write(&ctr->c_lock);
|
||||
ret = ctr->c_send++;
|
||||
rw_exit_write(&ctr->c_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
noise_counter_recv(struct noise_counter *ctr, uint64_t recv)
|
||||
{
|
||||
uint64_t i, top, index_recv, index_ctr;
|
||||
COUNTER_TYPE bit;
|
||||
int ret = EEXIST;
|
||||
|
||||
rw_enter_write(&ctr->c_lock);
|
||||
|
||||
/* Check that the recv counter is valid */
|
||||
if (ctr->c_recv >= REJECT_AFTER_MESSAGES ||
|
||||
recv >= REJECT_AFTER_MESSAGES)
|
||||
goto error;
|
||||
|
||||
/* If the packet is out of the window, invalid */
|
||||
if (recv + COUNTER_WINDOW_SIZE < ctr->c_recv)
|
||||
goto error;
|
||||
|
||||
/* If the new counter is ahead of the current counter, we'll need to
|
||||
* zero out the bitmap that has previously been used */
|
||||
index_recv = recv / COUNTER_TYPE_BITS;
|
||||
index_ctr = ctr->c_recv / COUNTER_TYPE_BITS;
|
||||
|
||||
if (recv > ctr->c_recv) {
|
||||
top = MIN(index_recv - index_ctr, COUNTER_TYPE_NUM);
|
||||
for (i = 1; i <= top; i++)
|
||||
ctr->c_backtrack[
|
||||
(i + index_ctr) & (COUNTER_TYPE_NUM - 1)] = 0;
|
||||
ctr->c_recv = recv;
|
||||
}
|
||||
|
||||
index_recv %= COUNTER_TYPE_NUM;
|
||||
bit = ((COUNTER_TYPE)1) << (recv % COUNTER_TYPE_BITS);
|
||||
|
||||
if (ctr->c_backtrack[index_recv] & bit)
|
||||
goto error;
|
||||
|
||||
ctr->c_backtrack[index_recv] |= bit;
|
||||
|
||||
ret = 0;
|
||||
error:
|
||||
rw_exit_write(&ctr->c_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
noise_kdf(uint8_t *a, uint8_t *b, uint8_t *c, const uint8_t *x,
|
||||
size_t a_len, size_t b_len, size_t c_len, size_t x_len,
|
||||
const uint8_t ck[NOISE_HASH_SIZE])
|
||||
{
|
||||
uint8_t out[BLAKE2S_HASH_SIZE + 1];
|
||||
uint8_t sec[BLAKE2S_HASH_SIZE];
|
||||
|
||||
ASSERT(a_len <= BLAKE2S_HASH_SIZE && b_len <= BLAKE2S_HASH_SIZE &&
|
||||
c_len <= BLAKE2S_HASH_SIZE);
|
||||
ASSERT(!(b || b_len || c || c_len) || (a && a_len));
|
||||
ASSERT(!(c || c_len) || (b && b_len));
|
||||
|
||||
/* Extract entropy from "x" into sec */
|
||||
blake2s_hmac(sec, x, ck, BLAKE2S_HASH_SIZE, x_len, NOISE_HASH_SIZE);
|
||||
|
||||
if (a == NULL || a_len == 0)
|
||||
goto out;
|
||||
|
||||
/* Expand first key: key = sec, data = 0x1 */
|
||||
out[0] = 1;
|
||||
blake2s_hmac(out, out, sec, BLAKE2S_HASH_SIZE, 1, BLAKE2S_HASH_SIZE);
|
||||
memcpy(a, out, a_len);
|
||||
|
||||
if (b == NULL || b_len == 0)
|
||||
goto out;
|
||||
|
||||
/* Expand second key: key = sec, data = "a" || 0x2 */
|
||||
out[BLAKE2S_HASH_SIZE] = 2;
|
||||
blake2s_hmac(out, out, sec, BLAKE2S_HASH_SIZE, BLAKE2S_HASH_SIZE + 1,
|
||||
BLAKE2S_HASH_SIZE);
|
||||
memcpy(b, out, b_len);
|
||||
|
||||
if (c == NULL || c_len == 0)
|
||||
goto out;
|
||||
|
||||
/* Expand third key: key = sec, data = "b" || 0x3 */
|
||||
out[BLAKE2S_HASH_SIZE] = 3;
|
||||
blake2s_hmac(out, out, sec, BLAKE2S_HASH_SIZE, BLAKE2S_HASH_SIZE + 1,
|
||||
BLAKE2S_HASH_SIZE);
|
||||
memcpy(c, out, c_len);
|
||||
|
||||
out:
|
||||
/* Clear sensitive data from stack */
|
||||
explicit_bzero(sec, BLAKE2S_HASH_SIZE);
|
||||
explicit_bzero(out, BLAKE2S_HASH_SIZE + 1);
|
||||
}
|
||||
|
||||
static int
|
||||
noise_mix_dh(uint8_t ck[NOISE_HASH_SIZE], uint8_t key[NOISE_SYMMETRIC_SIZE],
|
||||
const uint8_t private[NOISE_KEY_SIZE],
|
||||
const uint8_t public[NOISE_KEY_SIZE])
|
||||
{
|
||||
uint8_t dh[NOISE_KEY_SIZE];
|
||||
|
||||
if (!curve25519(dh, private, public))
|
||||
return EINVAL;
|
||||
noise_kdf(ck, key, NULL, dh,
|
||||
NOISE_HASH_SIZE, NOISE_SYMMETRIC_SIZE, 0, NOISE_KEY_SIZE, ck);
|
||||
explicit_bzero(dh, NOISE_KEY_SIZE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
noise_mix_ss(uint8_t ck[NOISE_HASH_SIZE], uint8_t key[NOISE_SYMMETRIC_SIZE],
|
||||
const uint8_t ss[NOISE_KEY_SIZE])
|
||||
{
|
||||
static uint8_t null_point[NOISE_KEY_SIZE];
|
||||
if (timingsafe_bcmp(ss, null_point, NOISE_KEY_SIZE) == 0)
|
||||
return ENOENT;
|
||||
noise_kdf(ck, key, NULL, ss,
|
||||
NOISE_HASH_SIZE, NOISE_SYMMETRIC_SIZE, 0, NOISE_KEY_SIZE, ck);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
noise_mix_hash(uint8_t hash[NOISE_HASH_SIZE], const uint8_t *src,
|
||||
size_t src_len)
|
||||
{
|
||||
struct blake2s_state blake;
|
||||
|
||||
blake2s_init(&blake, NOISE_HASH_SIZE);
|
||||
blake2s_update(&blake, hash, NOISE_HASH_SIZE);
|
||||
blake2s_update(&blake, src, src_len);
|
||||
blake2s_final(&blake, hash, NOISE_HASH_SIZE);
|
||||
}
|
||||
|
||||
static void
|
||||
noise_mix_psk(uint8_t ck[NOISE_HASH_SIZE], uint8_t hash[NOISE_HASH_SIZE],
|
||||
uint8_t key[NOISE_SYMMETRIC_SIZE], const uint8_t psk[NOISE_KEY_SIZE])
|
||||
{
|
||||
uint8_t tmp[NOISE_HASH_SIZE];
|
||||
|
||||
noise_kdf(ck, tmp, key, psk,
|
||||
NOISE_HASH_SIZE, NOISE_HASH_SIZE, NOISE_SYMMETRIC_SIZE,
|
||||
NOISE_PSK_SIZE, ck);
|
||||
noise_mix_hash(hash, tmp, NOISE_HASH_SIZE);
|
||||
explicit_bzero(tmp, NOISE_HASH_SIZE);
|
||||
}
|
||||
|
||||
static void
|
||||
noise_param_init(uint8_t ck[NOISE_HASH_SIZE], uint8_t hash[NOISE_HASH_SIZE],
|
||||
const uint8_t s[NOISE_KEY_SIZE])
|
||||
{
|
||||
struct blake2s_state blake;
|
||||
|
||||
blake2s(ck, (uint8_t *)NOISE_HANDSHAKE_NAME, NULL,
|
||||
NOISE_HASH_SIZE, strlen(NOISE_HANDSHAKE_NAME), 0);
|
||||
blake2s_init(&blake, NOISE_HASH_SIZE);
|
||||
blake2s_update(&blake, ck, NOISE_HASH_SIZE);
|
||||
blake2s_update(&blake, (uint8_t *)NOISE_IDENTIFIER_NAME,
|
||||
strlen(NOISE_IDENTIFIER_NAME));
|
||||
blake2s_final(&blake, hash, NOISE_HASH_SIZE);
|
||||
|
||||
noise_mix_hash(hash, s, NOISE_KEY_SIZE);
|
||||
}
|
||||
|
||||
static void
|
||||
noise_msg_encrypt(uint8_t *dst, const uint8_t *src, size_t src_len,
|
||||
uint8_t key[NOISE_SYMMETRIC_SIZE], uint8_t hash[NOISE_HASH_SIZE])
|
||||
{
|
||||
/* Nonce always zero for Noise_IK */
|
||||
chacha20poly1305_encrypt(dst, src, src_len,
|
||||
hash, NOISE_HASH_SIZE, 0, key);
|
||||
noise_mix_hash(hash, dst, src_len + NOISE_MAC_SIZE);
|
||||
}
|
||||
|
||||
static int
|
||||
noise_msg_decrypt(uint8_t *dst, const uint8_t *src, size_t src_len,
|
||||
uint8_t key[NOISE_SYMMETRIC_SIZE], uint8_t hash[NOISE_HASH_SIZE])
|
||||
{
|
||||
/* Nonce always zero for Noise_IK */
|
||||
if (!chacha20poly1305_decrypt(dst, src, src_len,
|
||||
hash, NOISE_HASH_SIZE, 0, key))
|
||||
return EINVAL;
|
||||
noise_mix_hash(hash, src, src_len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
noise_msg_ephemeral(uint8_t ck[NOISE_HASH_SIZE], uint8_t hash[NOISE_HASH_SIZE],
|
||||
const uint8_t src[NOISE_KEY_SIZE])
|
||||
{
|
||||
noise_mix_hash(hash, src, NOISE_KEY_SIZE);
|
||||
noise_kdf(ck, NULL, NULL, src, NOISE_HASH_SIZE, 0, 0, NOISE_KEY_SIZE, ck);
|
||||
}
|
||||
|
||||
static void
|
||||
noise_tai64n_now(uint8_t output[NOISE_TIMESTAMP_SIZE])
|
||||
{
|
||||
struct timespec time;
|
||||
|
||||
getnanotime(&time);
|
||||
|
||||
/* Round down the nsec counter to limit precise timing leak. */
|
||||
time.tv_nsec &= REJECT_INTERVAL_MASK;
|
||||
|
||||
/* https://cr.yp.to/libtai/tai64.html */
|
||||
*(uint64_t *)output = htobe64(0x400000000000000aULL + time.tv_sec);
|
||||
*(uint32_t *)(output + sizeof(uint64_t)) = htobe32(time.tv_nsec);
|
||||
}
|
||||
|
||||
static int
|
||||
noise_timer_expired(struct timespec *birthdate, time_t sec, long nsec)
|
||||
{
|
||||
struct timespec uptime;
|
||||
struct timespec expire = { .tv_sec = sec, .tv_nsec = nsec };
|
||||
|
||||
/* We don't really worry about a zeroed birthdate, to avoid the extra
|
||||
* check on every encrypt/decrypt. This does mean that r_last_init
|
||||
* check may fail if getnanouptime is < REJECT_INTERVAL from 0. */
|
||||
|
||||
getnanouptime(&uptime);
|
||||
timespecadd(birthdate, &expire, &expire);
|
||||
return timespeccmp(&uptime, &expire, >) ? ETIMEDOUT : 0;
|
||||
}
|
@ -817,3 +817,16 @@ void
|
||||
taskqgroup_destroy(struct taskqgroup *qgroup)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
taskqgroup_drain_all(struct taskqgroup *tqg)
|
||||
{
|
||||
struct gtaskqueue *q;
|
||||
|
||||
for (int i = 0; i < mp_ncpus; i++) {
|
||||
q = tqg->tqg_queue[i].tgc_taskq;
|
||||
if (q == NULL)
|
||||
continue;
|
||||
gtaskqueue_drain_all(q);
|
||||
}
|
||||
}
|
||||
|
@ -162,6 +162,7 @@ SUBDIR= \
|
||||
if_tuntap \
|
||||
if_vlan \
|
||||
if_vxlan \
|
||||
if_wg \
|
||||
iflib \
|
||||
${_iir} \
|
||||
imgact_binmisc \
|
||||
|
41
sys/modules/if_wg/Makefile
Normal file
41
sys/modules/if_wg/Makefile
Normal file
@ -0,0 +1,41 @@
|
||||
# $FreeBSD$
|
||||
|
||||
|
||||
KMOD= if_wg
|
||||
|
||||
INCDIR= ${SRCTOP}/sys/dev/if_wg/include
|
||||
ZINCDIR= ${SRCTOP}/sys/dev/if_wg/module/crypto/zinc
|
||||
|
||||
.PATH: ${SRCTOP}/sys/dev/if_wg/module
|
||||
.PATH: ${ZINCDIR}
|
||||
.PATH: ${ZINCDIR}/chacha20
|
||||
.PATH: ${ZINCDIR}/poly1305
|
||||
|
||||
CFLAGS+= -I${INCDIR}
|
||||
|
||||
CFLAGS+= -D__KERNEL__
|
||||
CFLAGS+= -ferror-limit=7
|
||||
|
||||
DEBUG_FLAGS=-g
|
||||
|
||||
SRCS= opt_inet.h opt_inet6.h device_if.h bus_if.h ifdi_if.h
|
||||
|
||||
SRCS+= if_wg_session.c module.c
|
||||
SRCS+= wg_noise.c wg_cookie.c
|
||||
SRCS+= curve25519.c blake2s.c
|
||||
SRCS+= chacha20poly1305.c chacha20.c poly1305.c
|
||||
|
||||
.if ${MACHINE_ARCH} == "amd64"
|
||||
SRCS += poly1305-x86_64.S chacha20-x86_64.S
|
||||
SIMD_FLAGS = -DCONFIG_AS_SSSE3=1 -DCONFIG_AS_AVX=1 \
|
||||
-DCONFIG_AS_AVX512=1 -DCONFIG_AS_AVX2=1
|
||||
.endif
|
||||
.include <bsd.kmod.mk>
|
||||
|
||||
.if ${MACHINE_ARCH} == "amd64"
|
||||
CFLAGS.poly1305-x86_64.S = -D__LOCORE -gdwarf-4 ${SIMD_FLAGS} -include ${INCDIR}/sys/support.h
|
||||
CFLAGS.chacha20-x86_64.S = -D__LOCORE -gdwarf-4 ${SIMD_FLAGS} -include ${INCDIR}/sys/support.h
|
||||
CFLAGS.chacha20poly1305.c = -DCONFIG_ZINC_ARCH_X86_64
|
||||
CFLAGS.chacha20.c = -DCONFIG_ZINC_ARCH_X86_64
|
||||
CFLAGS.poly1305.c = -DCONFIG_ZINC_ARCH_X86_64
|
||||
.endif
|
@ -269,7 +269,6 @@ iflib_clone_register(if_shared_ctx_t sctx)
|
||||
printf("clone_simple failed -- cloned %s devices will not be available\n", sctx->isc_name);
|
||||
goto fail_clone;
|
||||
}
|
||||
ifc_flags_set(ip->ip_ifc, IFC_NOGROUP);
|
||||
ip->ip_lladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event,
|
||||
iflib_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
|
||||
if (ip->ip_lladdr_tag == NULL)
|
||||
|
@ -80,6 +80,7 @@ void taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask);
|
||||
struct taskqgroup *taskqgroup_create(const char *name, int cnt, int stride);
|
||||
void taskqgroup_destroy(struct taskqgroup *qgroup);
|
||||
void taskqgroup_bind(struct taskqgroup *qgroup);
|
||||
void taskqgroup_drain_all(struct taskqgroup *qgroup);
|
||||
|
||||
#define GTASK_INIT(gtask, flags, priority, func, context) do { \
|
||||
(gtask)->ta_flags = flags; \
|
||||
|
Loading…
Reference in New Issue
Block a user