pf: Implement the NAT source port selection of MAP-E Customer Edge

MAP-E (RFC 7597) requires special care for selecting source ports
in NAT operation on the Customer Edge because a part of bits of the port
numbers are used by the Border Relay to distinguish another side of the
IPv4-over-IPv6 tunnel.

PR:		254577
Reviewed by:	kp
Differential Revision:	https://reviews.freebsd.org/D29468
This commit is contained in:
Kurosawa Takahiro 2021-04-13 10:50:00 +02:00 committed by Kristof Provost
parent e6ab1e365c
commit 2aa21096c7
11 changed files with 303 additions and 10 deletions
lib/libpfctl
sbin/pfctl
share/man/man5
sys
tests/sys/netpfil/pf

@ -196,6 +196,18 @@ pf_nvrule_addr_to_rule_addr(const nvlist_t *nvl, struct pf_rule_addr *addr)
addr->port_op = nvlist_get_number(nvl, "port_op");
}
static void
pfctl_nv_add_mape(nvlist_t *nvparent, const char *name,
const struct pf_mape_portset *mape)
{
nvlist_t *nvl = nvlist_create(0);
nvlist_add_number(nvl, "offset", mape->offset);
nvlist_add_number(nvl, "psidlen", mape->psidlen);
nvlist_add_number(nvl, "psid", mape->psid);
nvlist_add_nvlist(nvparent, name, nvl);
}
static void
pfctl_nv_add_pool(nvlist_t *nvparent, const char *name,
const struct pfctl_pool *pool)
@ -211,10 +223,19 @@ pfctl_nv_add_pool(nvlist_t *nvparent, const char *name,
ports[1] = pool->proxy_port[1];
nvlist_add_number_array(nvl, "proxy_port", ports, 2);
nvlist_add_number(nvl, "opts", pool->opts);
pfctl_nv_add_mape(nvl, "mape", &pool->mape);
nvlist_add_nvlist(nvparent, name, nvl);
}
static void
pf_nvmape_to_mape(const nvlist_t *nvl, struct pf_mape_portset *mape)
{
mape->offset = nvlist_get_number(nvl, "offset");
mape->psidlen = nvlist_get_number(nvl, "psidlen");
mape->psid = nvlist_get_number(nvl, "psid");
}
static void
pf_nvpool_to_pool(const nvlist_t *nvl, struct pfctl_pool *pool)
{
@ -230,6 +251,9 @@ pf_nvpool_to_pool(const nvlist_t *nvl, struct pfctl_pool *pool)
pool->tblidx = nvlist_get_number(nvl, "tblidx");
pf_nvuint_16_array(nvl, "proxy_port", 2, pool->proxy_port, NULL);
pool->opts = nvlist_get_number(nvl, "opts");
if (nvlist_exists_nvlist(nvl, "mape"))
pf_nvmape_to_mape(nvlist_get_nvlist(nvl, "mape"), &pool->mape);
}
static void

@ -43,6 +43,7 @@ struct pfctl_pool {
struct pf_pooladdr *cur;
struct pf_poolhashkey key;
struct pf_addr counter;
struct pf_mape_portset mape;
int tblidx;
u_int16_t proxy_port[2];
u_int8_t opts;

@ -306,6 +306,7 @@ static struct pool_opts {
int type;
int staticport;
struct pf_poolhashkey *key;
struct pf_mape_portset mape;
} pool_opts;
@ -461,7 +462,7 @@ int parseport(char *, struct range *r, int);
%token SET OPTIMIZATION TIMEOUT LIMIT LOGINTERFACE BLOCKPOLICY FAILPOLICY
%token RANDOMID REQUIREORDER SYNPROXY FINGERPRINTS NOSYNC DEBUG SKIP HOSTID
%token ANTISPOOF FOR INCLUDE
%token BITMASK RANDOM SOURCEHASH ROUNDROBIN STATICPORT PROBABILITY
%token BITMASK RANDOM SOURCEHASH ROUNDROBIN STATICPORT PROBABILITY MAPEPORTSET
%token ALTQ CBQ CODEL PRIQ HFSC FAIRQ BANDWIDTH TBRSIZE LINKSHARE REALTIME
%token UPPERLIMIT QUEUE PRIORITY QLIMIT HOGS BUCKETS RTABLE TARGET INTERVAL
%token LOAD RULESET_OPTIMIZATION PRIO
@ -4015,6 +4016,36 @@ pool_opt : BITMASK {
pool_opts.marker |= POM_STICKYADDRESS;
pool_opts.opts |= PF_POOL_STICKYADDR;
}
| MAPEPORTSET number '/' number '/' number {
if (pool_opts.mape.offset) {
yyerror("map-e-portset cannot be redefined");
YYERROR;
}
if (pool_opts.type) {
yyerror("map-e-portset cannot be used with "
"address pools");
YYERROR;
}
if ($2 <= 0 || $2 >= 16) {
yyerror("MAP-E PSID offset must be 1-15");
YYERROR;
}
if ($4 < 0 || $4 >= 16 || $2 + $4 > 16) {
yyerror("Invalid MAP-E PSID length");
YYERROR;
} else if ($4 == 0) {
yyerror("PSID Length = 0: this means"
" you do not need MAP-E");
YYERROR;
}
if ($6 < 0 || $6 > 65535) {
yyerror("Invalid MAP-E PSID");
YYERROR;
}
pool_opts.mape.offset = $2;
pool_opts.mape.psidlen = $4;
pool_opts.mape.psid = $6;
}
;
redirection : /* empty */ { $$ = NULL; }
@ -4220,6 +4251,29 @@ natrule : nataction interface af proto fromto tag tagged rtable
r.rpool.proxy_port[1] = 0;
}
if ($10.mape.offset) {
if (r.action != PF_NAT) {
yyerror("the 'map-e-portset' option is"
" only valid with nat rules");
YYERROR;
}
if ($10.staticport) {
yyerror("the 'map-e-portset' option"
" can't be used 'static-port'");
YYERROR;
}
if (r.rpool.proxy_port[0] !=
PF_NAT_PROXY_PORT_LOW &&
r.rpool.proxy_port[1] !=
PF_NAT_PROXY_PORT_HIGH) {
yyerror("the 'map-e-portset' option"
" can't be used when specifying"
" a port range");
YYERROR;
}
r.rpool.mape = $10.mape;
}
expand_rule(&r, $2, $9 == NULL ? NULL : $9->host, $4,
$5.src_os, $5.src.host, $5.src.port, $5.dst.host,
$5.dst.port, 0, 0, 0, "");
@ -5545,6 +5599,7 @@ lookup(char *s)
{ "load", LOAD},
{ "log", LOG},
{ "loginterface", LOGINTERFACE},
{ "map-e-portset", MAPEPORTSET},
{ "max", MAXIMUM},
{ "max-mss", MAXMSS},
{ "max-src-conn", MAXSRCCONN},

@ -486,6 +486,9 @@ print_pool(struct pfctl_pool *pool, u_int16_t p1, u_int16_t p2,
printf(" sticky-address");
if (id == PF_NAT && p1 == 0 && p2 == 0)
printf(" static-port");
if (pool->mape.offset > 0)
printf(" map-e-portset %u/%u/%u",
pool->mape.offset, pool->mape.psidlen, pool->mape.psid);
}
const char * const pf_reasons[PFRES_MAX+1] = PFRES_NAMES;

@ -1998,6 +1998,27 @@ rules, the
option prevents
.Xr pf 4
from modifying the source port on TCP and UDP packets.
.It Xo Ar map-e-portset Aq Ar psid-offset
.No / Aq Ar psid-len
.No / Aq Ar psid
.Xc
With
.Ar nat
rules, the
.Ar map-e-portset
option enables the source port translation of MAP-E (RFC 7597) Customer Edge.
In order to make the host act as a MAP-E Customer Edge, setting up a tunneling
interface and pass rules for encapsulated packets are required in addition
to the map-e-portset nat rule.
.Pp
For example:
.Bd -literal -offset indent
nat on $gif_mape_if from $int_if:network to any \e
-> $ipv4_mape_src map-e-portset 6/8/0x34
.Ed
.Pp
sets PSID offset 6, PSID length 8, PSID 0x34.
.Ed
.El
.Pp
Additionally, the
@ -2893,7 +2914,8 @@ nat-rule = [ "no" ] "nat" [ "pass" [ "log" [ "(" logopts ")" ] ] ]
[ "on" ifspec ] [ af ]
[ protospec ] hosts [ "tag" string ] [ "tagged" string ]
[ "-\*(Gt" ( redirhost | "{" redirhost-list "}" )
[ portspec ] [ pooltype ] [ "static-port" ] ]
[ portspec ] [ pooltype ] [ "static-port" ]
[ "map-e-portset" number "/" number "/" number ] ]
binat-rule = [ "no" ] "binat" [ "pass" [ "log" [ "(" logopts ")" ] ] ]
[ "on" interface-name ] [ af ]

@ -309,6 +309,7 @@ struct pf_kpool {
struct pf_kpooladdr *cur;
struct pf_poolhashkey key;
struct pf_addr counter;
struct pf_mape_portset mape;
int tblidx;
u_int16_t proxy_port[2];
u_int8_t opts;

@ -317,6 +317,12 @@ struct pf_poolhashkey {
#define key32 pfk.key32
};
struct pf_mape_portset {
u_int8_t offset;
u_int8_t psidlen;
u_int16_t psid;
};
struct pf_pool {
struct pf_palist list;
struct pf_pooladdr *cur;

@ -1644,6 +1644,36 @@ pf_addr_to_nvaddr(const struct pf_addr *paddr)
return (nvl);
}
static int
pf_nvmape_to_mape(const nvlist_t *nvl, struct pf_mape_portset *mape)
{
int error = 0;
bzero(mape, sizeof(*mape));
PFNV_CHK(pf_nvuint8(nvl, "offset", &mape->offset));
PFNV_CHK(pf_nvuint8(nvl, "psidlen", &mape->psidlen));
PFNV_CHK(pf_nvuint16(nvl, "psid", &mape->psid));
errout:
return (error);
}
static nvlist_t *
pf_mape_to_nvmape(const struct pf_mape_portset *mape)
{
nvlist_t *nvl;
nvl = nvlist_create(0);
if (nvl == NULL)
return (NULL);
nvlist_add_number(nvl, "offset", mape->offset);
nvlist_add_number(nvl, "psidlen", mape->psidlen);
nvlist_add_number(nvl, "psid", mape->psid);
return (nvl);
}
static int
pf_nvpool_to_pool(const nvlist_t *nvl, struct pf_kpool *kpool)
{
@ -1663,6 +1693,11 @@ pf_nvpool_to_pool(const nvlist_t *nvl, struct pf_kpool *kpool)
NULL));
PFNV_CHK(pf_nvuint8(nvl, "opts", &kpool->opts));
if (nvlist_exists_nvlist(nvl, "mape")) {
PFNV_CHK(pf_nvmape_to_mape(nvlist_get_nvlist(nvl, "mape"),
&kpool->mape));
}
errout:
return (error);
}
@ -1687,6 +1722,11 @@ pf_pool_to_nvpool(const struct pf_kpool *pool)
pf_uint16_array_nv(nvl, "proxy_port", pool->proxy_port, 2);
nvlist_add_number(nvl, "opts", pool->opts);
tmp = pf_mape_to_nvmape(&pool->mape);
if (tmp == NULL)
goto error;
nvlist_add_nvlist(nvl, "mape", tmp);
return (nvl);
error:

@ -224,11 +224,6 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
return (1);
if (proto == IPPROTO_ICMP) {
low = 1;
high = 65535;
}
bzero(&key, sizeof(key));
key.af = af;
key.proto = proto;
@ -310,6 +305,42 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
return (1); /* none available */
}
static int
pf_get_mape_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr,
uint16_t dport, struct pf_addr *naddr, uint16_t *nport,
struct pf_ksrc_node **sn)
{
uint16_t psmask, low, highmask;
uint16_t i, ahigh, cut;
int ashift, psidshift;
ashift = 16 - r->rpool.mape.offset;
psidshift = ashift - r->rpool.mape.psidlen;
psmask = r->rpool.mape.psid & ((1U << r->rpool.mape.psidlen) - 1);
psmask = psmask << psidshift;
highmask = (1U << psidshift) - 1;
ahigh = (1U << r->rpool.mape.offset) - 1;
cut = arc4random() & ahigh;
if (cut == 0)
cut = 1;
for (i = cut; i <= ahigh; i++) {
low = (i << ashift) | psmask;
if (!pf_get_sport(af, proto, r, saddr, sport, daddr, dport,
naddr, nport, low, low | highmask, sn))
return (0);
}
for (i = cut - 1; i > 0; i--) {
low = (i << ashift) | psmask;
if (!pf_get_sport(af, proto, r, saddr, sport, daddr, dport,
naddr, nport, low, low | highmask, sn))
return (0);
}
return (1);
}
int
pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_ksrc_node **sn)
@ -530,6 +561,7 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
struct pf_krule *r = NULL;
struct pf_addr *naddr;
uint16_t *nport;
uint16_t low, high;
PF_RULES_RASSERT();
KASSERT(*skp == NULL, ("*skp not NULL"));
@ -577,9 +609,26 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
switch (r->action) {
case PF_NAT:
if (pf_get_sport(pd->af, pd->proto, r, saddr, sport, daddr,
dport, naddr, nport, r->rpool.proxy_port[0],
r->rpool.proxy_port[1], sn)) {
if (pd->proto == IPPROTO_ICMP) {
low = 1;
high = 65535;
} else {
low = r->rpool.proxy_port[0];
high = r->rpool.proxy_port[1];
}
if (r->rpool.mape.offset > 0) {
if (pf_get_mape_sport(pd->af, pd->proto, r, saddr,
sport, daddr, dport, naddr, nport, sn)) {
DPFPRINTF(PF_DEBUG_MISC,
("pf: MAP-E port allocation (%u/%u/%u)"
" failed\n",
r->rpool.mape.offset,
r->rpool.mape.psidlen,
r->rpool.mape.psid));
goto notrans;
}
} else if (pf_get_sport(pd->af, pd->proto, r, saddr, sport,
daddr, dport, naddr, nport, low, high, sn)) {
DPFPRINTF(PF_DEBUG_MISC,
("pf: NAT proxy port allocation (%u-%u) failed\n",
r->rpool.proxy_port[0], r->rpool.proxy_port[1]));

@ -12,6 +12,7 @@ ATF_TESTS_SH+= altq \
forward \
fragmentation \
icmp \
map_e \
names \
nat \
pass_block \

@ -0,0 +1,91 @@
# $FreeBSD$
#
# SPDX-License-Identifier: BSD-2-Clause-FreeBSD
#
# Copyright (c) 2021 KUROSAWA Takahiro <takahiro.kurosawa@gmail.com>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
. $(atf_get_srcdir)/utils.subr
atf_test_case "map_e" "cleanup"
map_e_head()
{
atf_set descr 'map-e-portset test'
atf_set require.user root
}
map_e_body()
{
NC_TRY_COUNT=12
pft_init
epair_map_e=$(vnet_mkepair)
epair_echo=$(vnet_mkepair)
vnet_mkjail map_e ${epair_map_e}b ${epair_echo}a
vnet_mkjail echo ${epair_echo}b
ifconfig ${epair_map_e}a 192.0.2.2/24 up
route add -net 198.51.100.0/24 192.0.2.1
jexec map_e ifconfig ${epair_map_e}b 192.0.2.1/24 up
jexec map_e ifconfig ${epair_echo}a 198.51.100.1/24 up
jexec map_e sysctl net.inet.ip.forwarding=1
jexec echo ifconfig ${epair_echo}b 198.51.100.2/24 up
jexec echo /usr/sbin/inetd -p inetd-echo.pid $(atf_get_srcdir)/echo_inetd.conf
# Enable pf!
jexec map_e pfctl -e
pft_set_rules map_e \
"nat pass on ${epair_echo}a inet from 192.0.2.0/24 to any -> (${epair_echo}a) map-e-portset 2/12/0x342"
# Only allow specified ports.
jexec echo pfctl -e
pft_set_rules echo "block return all" \
"pass in on ${epair_echo}b inet proto tcp from 198.51.100.1 port 19720:19723 to (${epair_echo}b) port 7" \
"pass in on ${epair_echo}b inet proto tcp from 198.51.100.1 port 36104:36107 to (${epair_echo}b) port 7" \
"pass in on ${epair_echo}b inet proto tcp from 198.51.100.1 port 52488:52491 to (${epair_echo}b) port 7"
i=0
while [ ${i} -lt ${NC_TRY_COUNT} ]
do
echo "foo ${i}" | timeout 2 nc -N 198.51.100.2 7
if [ $? -ne 0 ]; then
atf_fail "nc failed (${i})"
fi
i=$((${i}+1))
done
}
map_e_cleanup()
{
rm -f inetd-echo.pid
pft_cleanup
}
atf_init_test_cases()
{
atf_add_test_case "map_e"
}