Reapply r345274 with build fixes for 32-bit architectures.

Update NAT64LSN implementation:

  o most of data structures and relations were modified to be able support
    large number of translation states. Now each supported protocol can
    use full ports range. Ports groups now are belongs to IPv4 alias
    addresses, not hosts. Each ports group can keep several states chunks.
    This is controlled with new `states_chunks` config option. States
    chunks allow to have several translation states for single alias address
    and port, but for different destination addresses.
  o by default all hash tables now use jenkins hash.
  o ConcurrencyKit and epoch(9) is used to make NAT64LSN lockless on fast path.
  o one NAT64LSN instance now can be used to handle several IPv6 prefixes,
    special prefix "::" value should be used for this purpose when instance
    is created.
  o due to modified internal data structures relations, the socket opcode
    that does states listing was changed.

Obtained from:	Yandex LLC
MFC after:	1 month
Sponsored by:	Yandex LLC
This commit is contained in:
ae 2019-03-19 10:57:03 +00:00
parent 670da7d638
commit d763427450
9 changed files with 1876 additions and 1929 deletions

View File

@ -1,7 +1,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd March 18, 2019
.Dd March 19, 2019
.Dt IPFW 8
.Os
.Sh NAME
@ -3300,6 +3300,7 @@ See
.Sx SYSCTL VARIABLES
for more info.
.Sh IPv6/IPv4 NETWORK ADDRESS AND PROTOCOL TRANSLATION
.Ss Stateful translation
.Nm
supports in-kernel IPv6/IPv4 network address and protocol translation.
Stateful NAT64 translation allows IPv6-only clients to contact IPv4 servers
@ -3317,7 +3318,8 @@ to be able use stateful NAT64 translator.
Stateful NAT64 uses a bunch of memory for several types of objects.
When IPv6 client initiates connection, NAT64 translator creates a host entry
in the states table.
Each host entry has a number of ports group entries allocated on demand.
Each host entry uses preallocated IPv4 alias entry.
Each alias entry has a number of ports group entries allocated on demand.
Ports group entries contains connection state entries.
There are several options to control limits and lifetime for these objects.
.Pp
@ -3337,6 +3339,11 @@ First time an original packet is handled and consumed by translator,
and then it is handled again as translated packet.
This behavior can be changed by sysctl variable
.Va net.inet.ip.fw.nat64_direct_output .
Also translated packet can be tagged using
.Cm tag
rule action, and then matched by
.Cm tagged
opcode to avoid loops and extra overhead.
.Pp
The stateful NAT64 configuration command is the following:
.Bd -ragged -offset indent
@ -3364,15 +3371,16 @@ to represent IPv4 addresses. This IPv6 prefix should be configured in DNS64.
The translator implementation follows RFC6052, that restricts the length of
prefixes to one of following: 32, 40, 48, 56, 64, or 96.
The Well-Known IPv6 Prefix 64:ff9b:: must be 96 bits long.
.It Cm max_ports Ar number
Maximum number of ports reserved for upper level protocols to one IPv6 client.
All reserved ports are divided into chunks between supported protocols.
The number of connections from one IPv6 client is limited by this option.
Note that closed TCP connections still remain in the list of connections until
.Cm tcp_close_age
interval will not expire.
Default value is
.Ar 2048 .
The special
.Ar ::/length
prefix can be used to handle several IPv6 prefixes with one NAT64 instance.
The NAT64 instance will determine a destination IPv4 address from prefix
.Ar length .
.It Cm states_chunks Ar number
The number of states chunks in single ports group.
Each ports group by default can keep 64 state entries in single chunk.
The above value affects the maximum number of states that can be associated with single IPv4 alias address and port.
The value must be power of 2, and up to 128.
.It Cm host_del_age Ar seconds
The number of seconds until the host entry for a IPv6 client will be deleted
and all its resources will be released due to inactivity.

View File

@ -278,6 +278,7 @@ enum tokens {
TOK_AGG_LEN,
TOK_AGG_COUNT,
TOK_MAX_PORTS,
TOK_STATES_CHUNKS,
TOK_JMAXLEN,
TOK_PORT_RANGE,
TOK_HOST_DEL_AGE,

View File

@ -87,68 +87,70 @@ nat64lsn_print_states(void *buf)
char sflags[4], *sf, *proto;
ipfw_obj_header *oh;
ipfw_obj_data *od;
ipfw_nat64lsn_stg *stg;
ipfw_nat64lsn_state *ste;
ipfw_nat64lsn_stg_v1 *stg;
ipfw_nat64lsn_state_v1 *ste;
uint64_t next_idx;
int i, sz;
oh = (ipfw_obj_header *)buf;
od = (ipfw_obj_data *)(oh + 1);
stg = (ipfw_nat64lsn_stg *)(od + 1);
stg = (ipfw_nat64lsn_stg_v1 *)(od + 1);
sz = od->head.length - sizeof(*od);
next_idx = 0;
while (sz > 0 && next_idx != 0xFF) {
next_idx = stg->next_idx;
next_idx = stg->next.index;
sz -= sizeof(*stg);
if (stg->count == 0) {
stg++;
continue;
}
switch (stg->proto) {
case IPPROTO_TCP:
proto = "TCP";
break;
case IPPROTO_UDP:
proto = "UDP";
break;
case IPPROTO_ICMPV6:
proto = "ICMPv6";
break;
}
inet_ntop(AF_INET6, &stg->host6, s, sizeof(s));
/*
* NOTE: addresses are in network byte order,
* ports are in host byte order.
*/
inet_ntop(AF_INET, &stg->alias4, a, sizeof(a));
ste = (ipfw_nat64lsn_state *)(stg + 1);
ste = (ipfw_nat64lsn_state_v1 *)(stg + 1);
for (i = 0; i < stg->count && sz > 0; i++) {
sf = sflags;
inet_ntop(AF_INET6, &ste->host6, s, sizeof(s));
inet_ntop(AF_INET, &ste->daddr, f, sizeof(f));
if (stg->proto == IPPROTO_TCP) {
switch (ste->proto) {
case IPPROTO_TCP:
proto = "TCP";
if (ste->flags & 0x02)
*sf++ = 'S';
if (ste->flags & 0x04)
*sf++ = 'E';
if (ste->flags & 0x01)
*sf++ = 'F';
break;
case IPPROTO_UDP:
proto = "UDP";
break;
case IPPROTO_ICMP:
proto = "ICMPv6";
break;
}
*sf = '\0';
switch (stg->proto) {
switch (ste->proto) {
case IPPROTO_TCP:
case IPPROTO_UDP:
printf("%s:%d\t%s:%d\t%s\t%s\t%d\t%s:%d\n",
s, ste->sport, a, ste->aport, proto,
sflags, ste->idle, f, ste->dport);
break;
case IPPROTO_ICMPV6:
case IPPROTO_ICMP:
printf("%s\t%s\t%s\t\t%d\t%s\n",
s, a, proto, ste->idle, f);
break;
default:
printf("%s\t%s\t%d\t\t%d\t%s\n",
s, a, stg->proto, ste->idle, f);
s, a, ste->proto, ste->idle, f);
}
ste++;
sz -= sizeof(*ste);
}
stg = (ipfw_nat64lsn_stg *)ste;
stg = (ipfw_nat64lsn_stg_v1 *)ste;
}
return (next_idx);
}
@ -174,6 +176,7 @@ nat64lsn_states_cb(ipfw_nat64lsn_cfg *cfg, const char *name, uint8_t set)
err(EX_OSERR, NULL);
do {
oh = (ipfw_obj_header *)buf;
oh->opheader.version = 1; /* Force using ov new API */
od = (ipfw_obj_data *)(oh + 1);
nat64lsn_fill_ntlv(&oh->ntlv, cfg->name, set);
od->head.type = IPFW_TLV_OBJDATA;
@ -363,12 +366,8 @@ nat64lsn_parse_int(const char *arg, const char *desc)
static struct _s_x nat64newcmds[] = {
{ "prefix6", TOK_PREFIX6 },
{ "agg_len", TOK_AGG_LEN }, /* not yet */
{ "agg_count", TOK_AGG_COUNT }, /* not yet */
{ "port_range", TOK_PORT_RANGE }, /* not yet */
{ "jmaxlen", TOK_JMAXLEN },
{ "prefix4", TOK_PREFIX4 },
{ "max_ports", TOK_MAX_PORTS },
{ "host_del_age", TOK_HOST_DEL_AGE },
{ "pg_del_age", TOK_PG_DEL_AGE },
{ "tcp_syn_age", TOK_TCP_SYN_AGE },
@ -376,10 +375,13 @@ static struct _s_x nat64newcmds[] = {
{ "tcp_est_age", TOK_TCP_EST_AGE },
{ "udp_age", TOK_UDP_AGE },
{ "icmp_age", TOK_ICMP_AGE },
{ "states_chunks",TOK_STATES_CHUNKS },
{ "log", TOK_LOG },
{ "-log", TOK_LOGOFF },
{ "allow_private", TOK_PRIVATE },
{ "-allow_private", TOK_PRIVATEOFF },
/* for compatibility with old configurations */
{ "max_ports", TOK_MAX_PORTS }, /* unused */
{ NULL, 0 }
};
@ -436,34 +438,10 @@ nat64lsn_create(const char *name, uint8_t set, int ac, char **av)
nat64lsn_parse_prefix(*av, AF_INET6, &cfg->prefix6,
&cfg->plen6);
if (ipfw_check_nat64prefix(&cfg->prefix6,
cfg->plen6) != 0)
cfg->plen6) != 0 &&
!IN6_IS_ADDR_UNSPECIFIED(&cfg->prefix6))
errx(EX_USAGE, "Bad prefix6 %s", *av);
ac--; av++;
break;
#if 0
case TOK_AGG_LEN:
NEED1("Aggregation prefix len required");
cfg->agg_prefix_len = nat64lsn_parse_int(*av, opt);
ac--; av++;
break;
case TOK_AGG_COUNT:
NEED1("Max per-prefix count required");
cfg->agg_prefix_max = nat64lsn_parse_int(*av, opt);
ac--; av++;
break;
case TOK_PORT_RANGE:
NEED1("port range x[:y] required");
if ((p = strchr(*av, ':')) == NULL)
cfg->min_port = (uint16_t)nat64lsn_parse_int(
*av, opt);
else {
*p++ = '\0';
cfg->min_port = (uint16_t)nat64lsn_parse_int(
*av, opt);
cfg->max_port = (uint16_t)nat64lsn_parse_int(
p, opt);
}
ac--; av++;
break;
case TOK_JMAXLEN:
@ -471,7 +449,6 @@ nat64lsn_create(const char *name, uint8_t set, int ac, char **av)
cfg->jmaxlen = nat64lsn_parse_int(*av, opt);
ac--; av++;
break;
#endif
case TOK_MAX_PORTS:
NEED1("Max per-user ports required");
cfg->max_ports = nat64lsn_parse_int(*av, opt);
@ -519,6 +496,12 @@ nat64lsn_create(const char *name, uint8_t set, int ac, char **av)
*av, opt);
ac--; av++;
break;
case TOK_STATES_CHUNKS:
NEED1("number of chunks required");
cfg->states_chunks = (uint8_t)nat64lsn_parse_int(
*av, opt);
ac--; av++;
break;
case TOK_LOG:
cfg->flags |= NAT64_LOG;
break;
@ -630,6 +613,12 @@ nat64lsn_config(const char *name, uint8_t set, int ac, char **av)
*av, opt);
ac--; av++;
break;
case TOK_STATES_CHUNKS:
NEED1("number of chunks required");
cfg->states_chunks = (uint8_t)nat64lsn_parse_int(
*av, opt);
ac--; av++;
break;
case TOK_LOG:
cfg->flags |= NAT64_LOG;
break;
@ -789,31 +778,24 @@ nat64lsn_show_cb(ipfw_nat64lsn_cfg *cfg, const char *name, uint8_t set)
printf("nat64lsn %s prefix4 %s/%u", cfg->name, abuf, cfg->plen4);
inet_ntop(AF_INET6, &cfg->prefix6, abuf, sizeof(abuf));
printf(" prefix6 %s/%u", abuf, cfg->plen6);
#if 0
printf("agg_len %u agg_count %u ", cfg->agg_prefix_len,
cfg->agg_prefix_max);
if (cfg->min_port != NAT64LSN_PORT_MIN ||
cfg->max_port != NAT64LSN_PORT_MAX)
printf(" port_range %u:%u", cfg->min_port, cfg->max_port);
if (cfg->jmaxlen != NAT64LSN_JMAXLEN)
printf(" jmaxlen %u ", cfg->jmaxlen);
#endif
if (cfg->max_ports != NAT64LSN_MAX_PORTS)
printf(" max_ports %u", cfg->max_ports);
if (cfg->nh_delete_delay != NAT64LSN_HOST_AGE)
if (co.verbose || cfg->states_chunks > 1)
printf(" states_chunks %u", cfg->states_chunks);
if (co.verbose || cfg->nh_delete_delay != NAT64LSN_HOST_AGE)
printf(" host_del_age %u", cfg->nh_delete_delay);
if (cfg->pg_delete_delay != NAT64LSN_PG_AGE)
if (co.verbose || cfg->pg_delete_delay != NAT64LSN_PG_AGE)
printf(" pg_del_age %u ", cfg->pg_delete_delay);
if (cfg->st_syn_ttl != NAT64LSN_TCP_SYN_AGE)
if (co.verbose || cfg->st_syn_ttl != NAT64LSN_TCP_SYN_AGE)
printf(" tcp_syn_age %u", cfg->st_syn_ttl);
if (cfg->st_close_ttl != NAT64LSN_TCP_FIN_AGE)
if (co.verbose || cfg->st_close_ttl != NAT64LSN_TCP_FIN_AGE)
printf(" tcp_close_age %u", cfg->st_close_ttl);
if (cfg->st_estab_ttl != NAT64LSN_TCP_EST_AGE)
if (co.verbose || cfg->st_estab_ttl != NAT64LSN_TCP_EST_AGE)
printf(" tcp_est_age %u", cfg->st_estab_ttl);
if (cfg->st_udp_ttl != NAT64LSN_UDP_AGE)
if (co.verbose || cfg->st_udp_ttl != NAT64LSN_UDP_AGE)
printf(" udp_age %u", cfg->st_udp_ttl);
if (cfg->st_icmp_ttl != NAT64LSN_ICMP_AGE)
if (co.verbose || cfg->st_icmp_ttl != NAT64LSN_ICMP_AGE)
printf(" icmp_age %u", cfg->st_icmp_ttl);
if (co.verbose || cfg->jmaxlen != NAT64LSN_JMAXLEN)
printf(" jmaxlen %u ", cfg->jmaxlen);
if (cfg->flags & NAT64_LOG)
printf(" log");
if (cfg->flags & NAT64_ALLOW_PRIVATE)

View File

@ -4398,9 +4398,9 @@ netpfil/ipfw/nat64/nat64clat.c optional inet inet6 ipfirewall \
netpfil/ipfw/nat64/nat64clat_control.c optional inet inet6 ipfirewall \
ipfirewall_nat64
netpfil/ipfw/nat64/nat64lsn.c optional inet inet6 ipfirewall \
ipfirewall_nat64
ipfirewall_nat64 compile-with "${NORMAL_C} -I$S/contrib/ck/include"
netpfil/ipfw/nat64/nat64lsn_control.c optional inet inet6 ipfirewall \
ipfirewall_nat64
ipfirewall_nat64 compile-with "${NORMAL_C} -I$S/contrib/ck/include"
netpfil/ipfw/nat64/nat64stl.c optional inet inet6 ipfirewall \
ipfirewall_nat64
netpfil/ipfw/nat64/nat64stl_control.c optional inet inet6 ipfirewall \

View File

@ -8,4 +8,6 @@ SRCS+= nat64clat.c nat64clat_control.c
SRCS+= nat64lsn.c nat64lsn_control.c
SRCS+= nat64stl.c nat64stl_control.c
CFLAGS+= -I${SRCTOP}/sys/contrib/ck/include
.include <bsd.kmod.mk>

View File

@ -122,7 +122,7 @@ typedef struct _ipfw_nat64clat_cfg {
/*
* NAT64LSN default configuration values
*/
#define NAT64LSN_MAX_PORTS 2048 /* Max number of ports per host */
#define NAT64LSN_MAX_PORTS 2048 /* Unused */
#define NAT64LSN_JMAXLEN 2048 /* Max outstanding requests. */
#define NAT64LSN_TCP_SYN_AGE 10 /* State's TTL after SYN received. */
#define NAT64LSN_TCP_EST_AGE (2 * 3600) /* TTL for established connection */
@ -135,16 +135,20 @@ typedef struct _ipfw_nat64clat_cfg {
typedef struct _ipfw_nat64lsn_cfg {
char name[64]; /* NAT name */
uint32_t flags;
uint32_t max_ports; /* Max ports per client */
uint32_t agg_prefix_len; /* Prefix length to count */
uint32_t agg_prefix_max; /* Max hosts per agg prefix */
uint32_t max_ports; /* Unused */
uint32_t agg_prefix_len; /* Unused */
uint32_t agg_prefix_max; /* Unused */
struct in_addr prefix4;
uint16_t plen4; /* Prefix length */
uint16_t plen6; /* Prefix length */
struct in6_addr prefix6; /* NAT64 prefix */
uint32_t jmaxlen; /* Max jobqueue length */
uint16_t min_port; /* Min port group # to use */
uint16_t max_port; /* Max port group # to use */
uint16_t min_port; /* Unused */
uint16_t max_port; /* Unused */
uint16_t nh_delete_delay;/* Stale host delete delay */
uint16_t pg_delete_delay;/* Stale portgroup delete delay */
uint16_t st_syn_ttl; /* TCP syn expire */
@ -153,7 +157,7 @@ typedef struct _ipfw_nat64lsn_cfg {
uint16_t st_udp_ttl; /* UDP expire */
uint16_t st_icmp_ttl; /* ICMP expire */
uint8_t set; /* Named instance set [0..31] */
uint8_t spare;
uint8_t states_chunks; /* Number of states chunks per PG */
} ipfw_nat64lsn_cfg;
typedef struct _ipfw_nat64lsn_state {
@ -177,5 +181,30 @@ typedef struct _ipfw_nat64lsn_stg {
uint32_t spare2;
} ipfw_nat64lsn_stg;
#endif /* _NETINET6_IP_FW_NAT64_H_ */
typedef struct _ipfw_nat64lsn_state_v1 {
struct in6_addr host6; /* Bound IPv6 host */
struct in_addr daddr; /* Remote IPv4 address */
uint16_t dport; /* Remote destination port */
uint16_t aport; /* Local alias port */
uint16_t sport; /* Source port */
uint16_t spare;
uint16_t idle; /* Last used time */
uint8_t flags; /* State flags */
uint8_t proto; /* protocol */
} ipfw_nat64lsn_state_v1;
typedef struct _ipfw_nat64lsn_stg_v1 {
union nat64lsn_pgidx {
uint64_t index;
struct {
uint8_t chunk; /* states chunk */
uint8_t proto; /* protocol */
uint16_t port; /* base port */
in_addr_t addr; /* alias address */
};
} next; /* next state index */
struct in_addr alias4; /* IPv4 alias address */
uint32_t count; /* Number of states */
} ipfw_nat64lsn_stg_v1;
#endif /* _NETINET6_IP_FW_NAT64_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -35,75 +35,149 @@
#include "ip_fw_nat64.h"
#include "nat64_translate.h"
#define NAT64_CHUNK_SIZE_BITS 6 /* 64 ports */
#define NAT64_CHUNK_SIZE (1 << NAT64_CHUNK_SIZE_BITS)
#define NAT64_MIN_PORT 1024
#define NAT64_MIN_CHUNK (NAT64_MIN_PORT >> NAT64_CHUNK_SIZE_BITS)
struct nat64lsn_host;
struct nat64lsn_alias;
struct st_ptr {
uint8_t idx; /* index in nh->pg_ptr array.
* NOTE: it starts from 1.
*/
uint8_t off;
struct nat64lsn_state {
/* IPv6 host entry keeps hash table to speedup state lookup */
CK_SLIST_ENTRY(nat64lsn_state) entries;
struct nat64lsn_host *host;
struct in6_addr ip6_dst; /* Destination IPv6 address */
in_addr_t ip_src; /* Alias IPv4 address */
in_addr_t ip_dst; /* Destination IPv4 address */
uint16_t dport; /* Destination port */
uint16_t sport; /* Source port */
uint32_t hval;
uint32_t flags; /* Internal flags */
uint16_t aport;
uint16_t timestamp; /* last used */
uint8_t proto;
uint8_t _spare[7];
};
#define NAT64LSN_MAXPGPTR ((1 << (sizeof(uint8_t) * NBBY)) - 1)
#define NAT64LSN_PGPTRMASKBITS (sizeof(uint64_t) * NBBY)
#define NAT64LSN_PGPTRNMASK (roundup(NAT64LSN_MAXPGPTR, \
NAT64LSN_PGPTRMASKBITS) / NAT64LSN_PGPTRMASKBITS)
struct nat64lsn_portgroup;
/* sizeof(struct nat64lsn_host) = 64 + 64x2 + 8x8 = 256 bytes */
struct nat64lsn_states_chunk {
struct nat64lsn_state state[64];
};
#define ISSET64(mask, bit) ((mask) & ((uint64_t)1 << (bit)))
#define ISSET32(mask, bit) ((mask) & ((uint32_t)1 << (bit)))
struct nat64lsn_pg {
CK_SLIST_ENTRY(nat64lsn_pg) entries;
uint16_t base_port;
uint16_t timestamp;
uint8_t proto;
uint8_t chunks_count;
uint8_t spare[2];
union {
uint64_t freemask64;
uint32_t freemask32[2];
uint64_t *freemask64_chunk;
uint32_t *freemask32_chunk;
void *freemask_chunk;
};
union {
struct nat64lsn_states_chunk *states;
struct nat64lsn_states_chunk **states_chunk;
};
};
#define CHUNK_BY_FADDR(p, a) ((a) & ((p)->chunks_count - 1))
#ifdef __LP64__
#define FREEMASK_CHUNK(p, v) \
((p)->chunks_count == 1 ? &(p)->freemask64 : \
&(p)->freemask64_chunk[CHUNK_BY_FADDR(p, v)])
#define FREEMASK_BITCOUNT(pg, faddr) \
bitcount64(*FREEMASK_CHUNK((pg), (faddr)))
#else
#define FREEMASK_CHUNK(p, v) \
((p)->chunks_count == 1 ? &(p)->freemask32[0] : \
&(p)->freemask32_chunk[CHUNK_BY_FADDR(p, v) * 2])
#define FREEMASK_BITCOUNT(pg, faddr) \
bitcount64(*(uint64_t *)FREEMASK_CHUNK((pg), (faddr)))
#endif /* !__LP64__ */
struct nat64lsn_pgchunk {
struct nat64lsn_pg *pgptr[32];
};
struct nat64lsn_aliaslink {
CK_SLIST_ENTRY(nat64lsn_aliaslink) alias_entries;
CK_SLIST_ENTRY(nat64lsn_aliaslink) host_entries;
struct nat64lsn_alias *alias;
};
CK_SLIST_HEAD(nat64lsn_aliaslink_slist, nat64lsn_aliaslink);
CK_SLIST_HEAD(nat64lsn_states_slist, nat64lsn_state);
CK_SLIST_HEAD(nat64lsn_hosts_slist, nat64lsn_host);
CK_SLIST_HEAD(nat64lsn_pg_slist, nat64lsn_pg);
struct nat64lsn_alias {
struct nat64lsn_aliaslink_slist hosts;
struct nat64lsn_pg_slist portgroups;
struct mtx lock;
in_addr_t addr; /* host byte order */
uint32_t hosts_count;
uint32_t portgroups_count;
uint32_t tcp_chunkmask;
uint32_t udp_chunkmask;
uint32_t icmp_chunkmask;
uint32_t tcp_pgidx;
uint32_t udp_pgidx;
uint32_t icmp_pgidx;
uint16_t timestamp;
uint16_t spare;
uint32_t tcp_pgmask[32];
uint32_t udp_pgmask[32];
uint32_t icmp_pgmask[32];
struct nat64lsn_pgchunk *tcp[32];
struct nat64lsn_pgchunk *udp[32];
struct nat64lsn_pgchunk *icmp[32];
/* pointer to PG that can be used for faster state allocation */
struct nat64lsn_pg *tcp_pg;
struct nat64lsn_pg *udp_pg;
struct nat64lsn_pg *icmp_pg;
};
#define ALIAS_LOCK_INIT(p) \
mtx_init(&(p)->lock, "alias_lock", NULL, MTX_DEF)
#define ALIAS_LOCK_DESTROY(p) mtx_destroy(&(p)->lock)
#define ALIAS_LOCK(p) mtx_lock(&(p)->lock)
#define ALIAS_UNLOCK(p) mtx_unlock(&(p)->lock)
#define NAT64LSN_HSIZE 256
#define NAT64LSN_MAX_HSIZE 4096
#define NAT64LSN_HOSTS_HSIZE 1024
struct nat64lsn_host {
struct rwlock h_lock; /* Host states lock */
struct in6_addr addr;
struct nat64lsn_host *next;
uint16_t timestamp; /* Last altered */
uint16_t hsize; /* ports hash size */
uint16_t pg_used; /* Number of portgroups used */
#define NAT64LSN_REMAININGPG 8 /* Number of remaining PG before
* requesting of new chunk of indexes.
*/
uint16_t pg_allocated; /* Number of portgroups indexes
* allocated.
*/
#define NAT64LSN_HSIZE 64
struct st_ptr phash[NAT64LSN_HSIZE]; /* XXX: hardcoded size */
/*
* PG indexes are stored in chunks with 32 elements.
* The maximum count is limited to 255 due to st_ptr->idx is uint8_t.
*/
#define NAT64LSN_PGIDX_CHUNK 32
#define NAT64LSN_PGNIDX (roundup(NAT64LSN_MAXPGPTR, \
NAT64LSN_PGIDX_CHUNK) / NAT64LSN_PGIDX_CHUNK)
struct nat64lsn_portgroup **pg_ptr[NAT64LSN_PGNIDX]; /* PG indexes */
struct in6_addr addr;
struct nat64lsn_aliaslink_slist aliases;
struct nat64lsn_states_slist *states_hash;
CK_SLIST_ENTRY(nat64lsn_host) entries;
uint32_t states_count;
uint32_t hval;
uint32_t flags;
#define NAT64LSN_DEADHOST 1
#define NAT64LSN_GROWHASH 2
uint16_t states_hashsize;
uint16_t timestamp;
struct mtx lock;
};
#define NAT64_RLOCK_ASSERT(h) rw_assert(&(h)->h_lock, RA_RLOCKED)
#define NAT64_WLOCK_ASSERT(h) rw_assert(&(h)->h_lock, RA_WLOCKED)
#define NAT64_RLOCK(h) rw_rlock(&(h)->h_lock)
#define NAT64_RUNLOCK(h) rw_runlock(&(h)->h_lock)
#define NAT64_WLOCK(h) rw_wlock(&(h)->h_lock)
#define NAT64_WUNLOCK(h) rw_wunlock(&(h)->h_lock)
#define NAT64_LOCK(h) NAT64_WLOCK(h)
#define NAT64_UNLOCK(h) NAT64_WUNLOCK(h)
#define NAT64_LOCK_INIT(h) do { \
rw_init(&(h)->h_lock, "NAT64 host lock"); \
} while (0)
#define NAT64_LOCK_DESTROY(h) do { \
rw_destroy(&(h)->h_lock); \
} while (0)
/* Internal proto index */
#define NAT_PROTO_TCP 1
#define NAT_PROTO_UDP 2
#define NAT_PROTO_ICMP 3
#define NAT_MAX_PROTO 4
extern uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO];
#define HOST_LOCK_INIT(p) \
mtx_init(&(p)->lock, "host_lock", NULL, MTX_DEF|MTX_NEW)
#define HOST_LOCK_DESTROY(p) mtx_destroy(&(p)->lock)
#define HOST_LOCK(p) mtx_lock(&(p)->lock)
#define HOST_UNLOCK(p) mtx_unlock(&(p)->lock)
VNET_DECLARE(uint16_t, nat64lsn_eid);
#define V_nat64lsn_eid VNET(nat64lsn_eid)
@ -112,124 +186,65 @@ VNET_DECLARE(uint16_t, nat64lsn_eid);
/* Timestamp macro */
#define _CT ((int)time_uptime % 65536)
#define SET_AGE(x) (x) = _CT
#define GET_AGE(x) ((_CT >= (x)) ? _CT - (x) : \
(int)65536 + _CT - (x))
#define GET_AGE(x) ((_CT >= (x)) ? _CT - (x): (int)65536 + _CT - (x))
#ifdef __LP64__
/* ffsl() is capable of checking 64-bit ints */
#define _FFS64
#endif
/* 16 bytes */
struct nat64lsn_state {
union {
struct {
in_addr_t faddr; /* Remote IPv4 address */
uint16_t fport; /* Remote IPv4 port */
uint16_t lport; /* Local IPv6 port */
}s;
uint64_t hkey;
} u;
uint8_t nat_proto;
uint8_t flags;
uint16_t timestamp;
struct st_ptr cur; /* Index of portgroup in nat64lsn_host */
struct st_ptr next; /* Next entry index */
};
/*
* 1024+32 bytes per 64 states, used to store state
* AND for outside-in state lookup
*/
struct nat64lsn_portgroup {
struct nat64lsn_host *host; /* IPv6 source host info */
in_addr_t aaddr; /* Alias addr, network format */
uint16_t aport; /* Base port */
uint16_t timestamp;
uint8_t nat_proto;
uint8_t spare[3];
uint32_t idx;
#ifdef _FFS64
uint64_t freemask; /* Mask of free entries */
#else
uint32_t freemask[2]; /* Mask of free entries */
#endif
struct nat64lsn_state states[NAT64_CHUNK_SIZE]; /* State storage */
};
#ifdef _FFS64
#define PG_MARK_BUSY_IDX(_pg, _idx) (_pg)->freemask &= ~((uint64_t)1<<(_idx))
#define PG_MARK_FREE_IDX(_pg, _idx) (_pg)->freemask |= ((uint64_t)1<<(_idx))
#define PG_IS_FREE_IDX(_pg, _idx) ((_pg)->freemask & ((uint64_t)1<<(_idx)))
#define PG_IS_BUSY_IDX(_pg, _idx) (PG_IS_FREE_IDX(_pg, _idx) == 0)
#define PG_GET_FREE_IDX(_pg) (ffsll((_pg)->freemask))
#define PG_IS_EMPTY(_pg) (((_pg)->freemask + 1) == 0)
#else
#define PG_MARK_BUSY_IDX(_pg, _idx) \
(_pg)->freemask[(_idx) / 32] &= ~((u_long)1<<((_idx) % 32))
#define PG_MARK_FREE_IDX(_pg, _idx) \
(_pg)->freemask[(_idx) / 32] |= ((u_long)1<<((_idx) % 32))
#define PG_IS_FREE_IDX(_pg, _idx) \
((_pg)->freemask[(_idx) / 32] & ((u_long)1<<((_idx) % 32)))
#define PG_IS_BUSY_IDX(_pg, _idx) (PG_IS_FREE_IDX(_pg, _idx) == 0)
#define PG_GET_FREE_IDX(_pg) _pg_get_free_idx(_pg)
#define PG_IS_EMPTY(_pg) \
((((_pg)->freemask[0] + 1) == 0 && ((_pg)->freemask[1] + 1) == 0))
static inline int
_pg_get_free_idx(const struct nat64lsn_portgroup *pg)
{
int i;
if ((i = ffsl(pg->freemask[0])) != 0)
return (i);
if ((i = ffsl(pg->freemask[1])) != 0)
return (i + 32);
return (0);
}
#endif
TAILQ_HEAD(nat64lsn_job_head, nat64lsn_job_item);
STAILQ_HEAD(nat64lsn_job_head, nat64lsn_job_item);
struct nat64lsn_cfg {
struct named_object no;
struct nat64lsn_portgroup **pg; /* XXX: array of pointers */
struct nat64lsn_host **ih; /* Host hash */
struct nat64lsn_hosts_slist *hosts_hash;
struct nat64lsn_alias *aliases; /* array of aliases */
struct mtx lock;
uint32_t hosts_hashsize;
uint32_t hash_seed;
uint32_t prefix4; /* IPv4 prefix */
uint32_t pmask4; /* IPv4 prefix mask */
uint32_t ihsize; /* IPv6 host hash size */
uint8_t plen4;
uint8_t nomatch_verdict;/* What to return to ipfw on no-match */
uint8_t nomatch_verdict;/* Return value on no-match */
uint32_t ihcount; /* Number of items in host hash */
int max_chunks; /* Max chunks per client */
int agg_prefix_len; /* Prefix length to count */
int agg_prefix_max; /* Max hosts per agg prefix */
uint32_t hosts_count; /* Number of items in host hash */
uint32_t states_chunks; /* Number of states chunks per PG */
uint32_t jmaxlen; /* Max jobqueue length */
uint16_t min_chunk; /* Min port group # to use */
uint16_t max_chunk; /* Max port group # to use */
uint16_t nh_delete_delay; /* Stale host delete delay */
uint16_t host_delete_delay; /* Stale host delete delay */
uint16_t pgchunk_delete_delay;
uint16_t pg_delete_delay; /* Stale portgroup del delay */
uint16_t st_syn_ttl; /* TCP syn expire */
uint16_t st_close_ttl; /* TCP fin expire */
uint16_t st_estab_ttl; /* TCP established expire */
uint16_t st_udp_ttl; /* UDP expire */
uint16_t st_icmp_ttl; /* ICMP expire */
uint32_t protochunks[NAT_MAX_PROTO];/* Number of chunks used */
struct nat64_config base;
#define NAT64LSN_FLAGSMASK (NAT64_LOG | NAT64_ALLOW_PRIVATE)
#define NAT64LSN_ANYPREFIX 0x00000100
struct mtx periodic_lock;
struct callout periodic;
struct callout jcallout;
struct ip_fw_chain *ch;
struct vnet *vp;
struct nat64lsn_job_head jhead;
int jlen;
char name[64]; /* Nat instance name */
};
/* CFG_LOCK protects cfg->hosts_hash from modification */
#define CFG_LOCK_INIT(p) \
mtx_init(&(p)->lock, "cfg_lock", NULL, MTX_DEF)
#define CFG_LOCK_DESTROY(p) mtx_destroy(&(p)->lock)
#define CFG_LOCK(p) mtx_lock(&(p)->lock)
#define CFG_UNLOCK(p) mtx_unlock(&(p)->lock)
#define CALLOUT_LOCK_INIT(p) \
mtx_init(&(p)->periodic_lock, "periodic_lock", NULL, MTX_DEF)
#define CALLOUT_LOCK_DESTROY(p) mtx_destroy(&(p)->periodic_lock)
#define CALLOUT_LOCK(p) mtx_lock(&(p)->periodic_lock)
#define CALLOUT_UNLOCK(p) mtx_unlock(&(p)->periodic_lock)
struct nat64lsn_cfg *nat64lsn_init_instance(struct ip_fw_chain *ch,
size_t numaddr);
in_addr_t prefix, int plen);
void nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg);
void nat64lsn_start_instance(struct nat64lsn_cfg *cfg);
void nat64lsn_init_internal(void);
@ -237,114 +252,4 @@ void nat64lsn_uninit_internal(void);
int ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
ipfw_insn *cmd, int *done);
void
nat64lsn_dump_state(const struct nat64lsn_cfg *cfg,
const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st,
const char *px, int off);
/*
* Portgroup layout
* addr x nat_proto x port_off
*
*/
#define _ADDR_PG_PROTO_COUNT (65536 >> NAT64_CHUNK_SIZE_BITS)
#define _ADDR_PG_COUNT (_ADDR_PG_PROTO_COUNT * NAT_MAX_PROTO)
#define GET_ADDR_IDX(_cfg, _addr) ((_addr) - ((_cfg)->prefix4))
#define __GET_PORTGROUP_IDX(_proto, _port) \
((_proto - 1) * _ADDR_PG_PROTO_COUNT + \
((_port) >> NAT64_CHUNK_SIZE_BITS))
#define _GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port) \
GET_ADDR_IDX(_cfg, _addr) * _ADDR_PG_COUNT + \
__GET_PORTGROUP_IDX(_proto, _port)
#define GET_PORTGROUP(_cfg, _addr, _proto, _port) \
((_cfg)->pg[_GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port)])
#define PORTGROUP_CHUNK(_nh, _idx) \
((_nh)->pg_ptr[(_idx)])
#define PORTGROUP_BYSIDX(_cfg, _nh, _idx) \
(PORTGROUP_CHUNK(_nh, (_idx - 1) / NAT64LSN_PGIDX_CHUNK) \
[((_idx) - 1) % NAT64LSN_PGIDX_CHUNK])
/* Chained hash table */
#define CHT_FIND(_ph, _hsize, _PX, _x, _key) do { \
unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
_PX##lock(_ph, _buck); \
_x = _PX##first(_ph, _buck); \
for ( ; _x != NULL; _x = _PX##next(_x)) { \
if (_PX##cmp(_key, _PX##val(_x))) \
break; \
} \
if (_x == NULL) \
_PX##unlock(_ph, _buck); \
} while(0)
#define CHT_UNLOCK_BUCK(_ph, _PX, _buck) \
_PX##unlock(_ph, _buck);
#define CHT_UNLOCK_KEY(_ph, _hsize, _PX, _key) do { \
unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
_PX##unlock(_ph, _buck); \
} while(0)
#define CHT_INSERT_HEAD(_ph, _hsize, _PX, _i) do { \
unsigned int _buck = _PX##hash(_PX##val(_i)) & (_hsize - 1); \
_PX##lock(_ph, _buck); \
_PX##next(_i) = _PX##first(_ph, _buck); \
_PX##first(_ph, _buck) = _i; \
_PX##unlock(_ph, _buck); \
} while(0)
#define CHT_REMOVE(_ph, _hsize, _PX, _x, _tmp, _key) do { \
unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
_PX##lock(_ph, _buck); \
_x = _PX##first(_ph, _buck); \
_tmp = NULL; \
for ( ; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \
if (_PX##cmp(_key, _PX##val(_x))) \
break; \
} \
if (_x != NULL) { \
if (_tmp == NULL) \
_PX##first(_ph, _buck) = _PX##next(_x); \
else \
_PX##next(_tmp) = _PX##next(_x); \
} \
_PX##unlock(_ph, _buck); \
} while(0)
#define CHT_FOREACH_SAFE(_ph, _hsize, _PX, _x, _tmp, _cb, _arg) do { \
for (unsigned int _i = 0; _i < _hsize; _i++) { \
_PX##lock(_ph, _i); \
_x = _PX##first(_ph, _i); \
_tmp = NULL; \
for (; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \
if (_cb(_x, _arg) == 0) \
continue; \
if (_tmp == NULL) \
_PX##first(_ph, _i) = _PX##next(_x); \
else \
_tmp = _PX##next(_x); \
} \
_PX##unlock(_ph, _i); \
} \
} while(0)
#define CHT_RESIZE(_ph, _hsize, _nph, _nhsize, _PX, _x, _y) do { \
unsigned int _buck; \
for (unsigned int _i = 0; _i < _hsize; _i++) { \
_x = _PX##first(_ph, _i); \
_y = _x; \
while (_y != NULL) { \
_buck = _PX##hash(_PX##val(_x)) & (_nhsize - 1);\
_y = _PX##next(_x); \
_PX##next(_x) = _PX##first(_nph, _buck); \
_PX##first(_nph, _buck) = _x; \
} \
} \
} while(0)
#endif /* _IP_FW_NAT64LSN_H_ */

View File

@ -33,6 +33,8 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/counter.h>
#include <sys/ck.h>
#include <sys/epoch.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/lock.h>
@ -43,10 +45,8 @@ __FBSDID("$FreeBSD$");
#include <sys/rwlock.h>
#include <sys/socket.h>
#include <sys/sockopt.h>
#include <sys/queue.h>
#include <net/if.h>
#include <net/pfil.h>
#include <netinet/in.h>
#include <netinet/ip.h>
@ -75,12 +75,6 @@ static void
nat64lsn_default_config(ipfw_nat64lsn_cfg *uc)
{
if (uc->max_ports == 0)
uc->max_ports = NAT64LSN_MAX_PORTS;
else
uc->max_ports = roundup(uc->max_ports, NAT64_CHUNK_SIZE);
if (uc->max_ports > NAT64_CHUNK_SIZE * NAT64LSN_MAXPGPTR)
uc->max_ports = NAT64_CHUNK_SIZE * NAT64LSN_MAXPGPTR;
if (uc->jmaxlen == 0)
uc->jmaxlen = NAT64LSN_JMAXLEN;
if (uc->jmaxlen > 65536)
@ -99,6 +93,13 @@ nat64lsn_default_config(ipfw_nat64lsn_cfg *uc)
uc->st_udp_ttl = NAT64LSN_UDP_AGE;
if (uc->st_icmp_ttl == 0)
uc->st_icmp_ttl = NAT64LSN_ICMP_AGE;
if (uc->states_chunks == 0)
uc->states_chunks = 1;
else if (uc->states_chunks >= 128)
uc->states_chunks = 128;
else if (!powerof2(uc->states_chunks))
uc->states_chunks = 1 << fls(uc->states_chunks);
}
/*
@ -127,12 +128,20 @@ nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
if (ipfw_check_object_name_generic(uc->name) != 0)
return (EINVAL);
if (uc->agg_prefix_len > 127 || uc->set >= IPFW_MAX_SETS)
if (uc->set >= IPFW_MAX_SETS)
return (EINVAL);
if (uc->plen4 > 32)
return (EINVAL);
if (nat64_check_prefix6(&uc->prefix6, uc->plen6) != 0)
/*
* Unspecified address has special meaning. But it must
* have valid prefix length. This length will be used to
* correctly extract and embedd IPv4 address into IPv6.
*/
if (nat64_check_prefix6(&uc->prefix6, uc->plen6) != 0 &&
IN6_IS_ADDR_UNSPECIFIED(&uc->prefix6) &&
nat64_check_prefixlen(uc->plen6) != 0)
return (EINVAL);
/* XXX: Check prefix4 to be global */
@ -140,14 +149,6 @@ nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
mask4 = ~((1 << (32 - uc->plen4)) - 1);
if ((addr4 & mask4) != addr4)
return (EINVAL);
if (uc->min_port == 0)
uc->min_port = NAT64_MIN_PORT;
if (uc->max_port == 0)
uc->max_port = 65535;
if (uc->min_port > uc->max_port)
return (EINVAL);
uc->min_port = roundup(uc->min_port, NAT64_CHUNK_SIZE);
uc->max_port = roundup(uc->max_port, NAT64_CHUNK_SIZE);
nat64lsn_default_config(uc);
@ -159,7 +160,7 @@ nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
}
IPFW_UH_RUNLOCK(ch);
cfg = nat64lsn_init_instance(ch, 1 << (32 - uc->plen4));
cfg = nat64lsn_init_instance(ch, addr4, uc->plen4);
strlcpy(cfg->name, uc->name, sizeof(cfg->name));
cfg->no.name = cfg->name;
cfg->no.etlv = IPFW_TLV_NAT64LSN_NAME;
@ -170,20 +171,12 @@ nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
cfg->base.flags = (uc->flags & NAT64LSN_FLAGSMASK) | NAT64_PLATPFX;
if (IN6_IS_ADDR_WKPFX(&cfg->base.plat_prefix))
cfg->base.flags |= NAT64_WKPFX;
else if (IN6_IS_ADDR_UNSPECIFIED(&cfg->base.plat_prefix))
cfg->base.flags |= NAT64LSN_ANYPREFIX;
cfg->prefix4 = addr4;
cfg->pmask4 = addr4 | ~mask4;
cfg->plen4 = uc->plen4;
cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE;
cfg->agg_prefix_len = uc->agg_prefix_len;
cfg->agg_prefix_max = uc->agg_prefix_max;
cfg->min_chunk = uc->min_port / NAT64_CHUNK_SIZE;
cfg->max_chunk = uc->max_port / NAT64_CHUNK_SIZE;
cfg->states_chunks = uc->states_chunks;
cfg->jmaxlen = uc->jmaxlen;
cfg->nh_delete_delay = uc->nh_delete_delay;
cfg->host_delete_delay = uc->nh_delete_delay;
cfg->pg_delete_delay = uc->pg_delete_delay;
cfg->st_syn_ttl = uc->st_syn_ttl;
cfg->st_close_ttl = uc->st_close_ttl;
@ -249,7 +242,7 @@ nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) {
IPFW_UH_WUNLOCK(ch);
return (ESRCH);
return (ENOENT);
}
if (cfg->no.refcnt > 0) {
@ -272,6 +265,8 @@ static void
export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
struct ipfw_nat64lsn_stats *stats)
{
struct nat64lsn_alias *alias;
int i, j;
__COPY_STAT_FIELD(cfg, stats, opcnt64);
__COPY_STAT_FIELD(cfg, stats, opcnt46);
@ -299,10 +294,16 @@ export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
__COPY_STAT_FIELD(cfg, stats, spgcreated);
__COPY_STAT_FIELD(cfg, stats, spgdeleted);
stats->hostcount = cfg->ihcount;
stats->tcpchunks = cfg->protochunks[NAT_PROTO_TCP];
stats->udpchunks = cfg->protochunks[NAT_PROTO_UDP];
stats->icmpchunks = cfg->protochunks[NAT_PROTO_ICMP];
stats->hostcount = cfg->hosts_count;
for (i = 0; i < (1 << (32 - cfg->plen4)); i++) {
alias = &cfg->aliases[i];
for (j = 0; j < 32 && ISSET32(alias->tcp_chunkmask, j); j++)
stats->tcpchunks += bitcount32(alias->tcp_pgmask[j]);
for (j = 0; j < 32 && ISSET32(alias->udp_chunkmask, j); j++)
stats->udpchunks += bitcount32(alias->udp_pgmask[j]);
for (j = 0; j < 32 && ISSET32(alias->icmp_chunkmask, j); j++)
stats->icmpchunks += bitcount32(alias->icmp_pgmask[j]);
}
}
#undef __COPY_STAT_FIELD
@ -312,12 +313,9 @@ nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
{
uc->flags = cfg->base.flags & NAT64LSN_FLAGSMASK;
uc->max_ports = cfg->max_chunks * NAT64_CHUNK_SIZE;
uc->agg_prefix_len = cfg->agg_prefix_len;
uc->agg_prefix_max = cfg->agg_prefix_max;
uc->states_chunks = cfg->states_chunks;
uc->jmaxlen = cfg->jmaxlen;
uc->nh_delete_delay = cfg->nh_delete_delay;
uc->nh_delete_delay = cfg->host_delete_delay;
uc->pg_delete_delay = cfg->pg_delete_delay;
uc->st_syn_ttl = cfg->st_syn_ttl;
uc->st_close_ttl = cfg->st_close_ttl;
@ -425,7 +423,7 @@ nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) {
IPFW_UH_RUNLOCK(ch);
return (EEXIST);
return (ENOENT);
}
nat64lsn_export_config(ch, cfg, uc);
IPFW_UH_RUNLOCK(ch);
@ -438,18 +436,18 @@ nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) {
IPFW_UH_WUNLOCK(ch);
return (EEXIST);
return (ENOENT);
}
/*
* For now allow to change only following values:
* jmaxlen, nh_del_age, pg_del_age, tcp_syn_age, tcp_close_age,
* tcp_est_age, udp_age, icmp_age, flags, max_ports.
* tcp_est_age, udp_age, icmp_age, flags, states_chunks.
*/
cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE;
cfg->states_chunks = uc->states_chunks;
cfg->jmaxlen = uc->jmaxlen;
cfg->nh_delete_delay = uc->nh_delete_delay;
cfg->host_delete_delay = uc->nh_delete_delay;
cfg->pg_delete_delay = uc->pg_delete_delay;
cfg->st_syn_ttl = uc->st_syn_ttl;
cfg->st_close_ttl = uc->st_close_ttl;
@ -496,7 +494,7 @@ nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) {
IPFW_UH_RUNLOCK(ch);
return (ESRCH);
return (ENOENT);
}
export_stats(ch, cfg, &stats);
@ -538,143 +536,139 @@ nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) {
IPFW_UH_WUNLOCK(ch);
return (ESRCH);
return (ENOENT);
}
COUNTER_ARRAY_ZERO(cfg->base.stats.cnt, NAT64STATS);
IPFW_UH_WUNLOCK(ch);
return (0);
}
#ifdef __LP64__
#define FREEMASK_COPY(pg, n, out) (out) = *FREEMASK_CHUNK((pg), (n))
#else
#define FREEMASK_COPY(pg, n, out) (out) = *FREEMASK_CHUNK((pg), (n)) | \
((uint64_t)*(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
#endif
/*
* Reply: [ ipfw_obj_header ipfw_obj_data [ ipfw_nat64lsn_stg
* ipfw_nat64lsn_state x count, ... ] ]
*/
static int
export_pg_states(struct nat64lsn_cfg *cfg, struct nat64lsn_portgroup *pg,
ipfw_nat64lsn_stg *stg, struct sockopt_data *sd)
nat64lsn_export_states_v1(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx,
struct nat64lsn_pg *pg, struct sockopt_data *sd, uint32_t *ret_count)
{
ipfw_nat64lsn_state *ste;
struct nat64lsn_state *st;
int i, count;
ipfw_nat64lsn_state_v1 *s;
struct nat64lsn_state *state;
uint64_t freemask;
uint32_t i, count;
/* validate user input */
if (idx->chunk > pg->chunks_count - 1)
return (EINVAL);
FREEMASK_COPY(pg, idx->chunk, freemask);
count = 64 - bitcount64(freemask);
if (count == 0)
return (0); /* Try next PG/chunk */
DPRINTF(DP_STATE, "EXPORT PG 0x%16jx, count %d",
(uintmax_t)idx->index, count);
s = (ipfw_nat64lsn_state_v1 *)ipfw_get_sopt_space(sd,
count * sizeof(ipfw_nat64lsn_state_v1));
if (s == NULL)
return (ENOMEM);
NAT64_LOCK(pg->host);
count = 0;
for (i = 0; i < 64; i++) {
if (PG_IS_BUSY_IDX(pg, i))
count++;
}
DPRINTF(DP_STATE, "EXPORT PG %d, count %d", pg->idx, count);
if (count == 0) {
stg->count = 0;
NAT64_UNLOCK(pg->host);
return (0);
}
ste = (ipfw_nat64lsn_state *)ipfw_get_sopt_space(sd,
count * sizeof(ipfw_nat64lsn_state));
if (ste == NULL) {
NAT64_UNLOCK(pg->host);
return (1);
}
stg->alias4.s_addr = pg->aaddr;
stg->proto = nat64lsn_rproto_map[pg->nat_proto];
stg->flags = 0;
stg->host6 = pg->host->addr;
stg->count = count;
for (i = 0; i < 64; i++) {
if (PG_IS_FREE_IDX(pg, i))
if (ISSET64(freemask, i))
continue;
st = &pg->states[i];
ste->daddr.s_addr = st->u.s.faddr;
ste->dport = st->u.s.fport;
ste->aport = pg->aport + i;
ste->sport = st->u.s.lport;
ste->flags = st->flags; /* XXX filter flags */
ste->idle = GET_AGE(st->timestamp);
ste++;
}
NAT64_UNLOCK(pg->host);
state = pg->chunks_count == 1 ? &pg->states->state[i] :
&pg->states_chunk[idx->chunk]->state[i];
s->host6 = state->host->addr;
s->daddr.s_addr = htonl(state->ip_dst);
s->dport = state->dport;
s->sport = state->sport;
s->aport = state->aport;
s->flags = (uint8_t)(state->flags & 7);
s->proto = state->proto;
s->idle = GET_AGE(state->timestamp);
s++;
}
*ret_count = count;
return (0);
}
#define LAST_IDX 0xFF
static int
get_next_idx(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
uint16_t *port)
nat64lsn_next_pgidx(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg,
union nat64lsn_pgidx *idx)
{
if (*port < 65536 - NAT64_CHUNK_SIZE) {
*port += NAT64_CHUNK_SIZE;
return (0);
}
*port = 0;
if (*nat_proto < NAT_MAX_PROTO - 1) {
*nat_proto += 1;
return (0);
}
*nat_proto = 1;
if (*addr < cfg->pmask4) {
*addr += 1;
return (0);
}
/* End of space. */
return (1);
}
#define PACK_IDX(addr, proto, port) \
((uint64_t)addr << 32) | ((uint32_t)port << 16) | (proto << 8)
#define UNPACK_IDX(idx, addr, proto, port) \
(addr) = (uint32_t)((idx) >> 32); \
(port) = (uint16_t)(((idx) >> 16) & 0xFFFF); \
(proto) = (uint8_t)(((idx) >> 8) & 0xFF)
static struct nat64lsn_portgroup *
get_next_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
uint16_t *port)
{
struct nat64lsn_portgroup *pg;
uint64_t pre_pack, post_pack;
pg = NULL;
pre_pack = PACK_IDX(*addr, *nat_proto, *port);
for (;;) {
if (get_next_idx(cfg, addr, nat_proto, port) != 0) {
/* End of states */
return (pg);
/* First iterate over chunks */
if (pg != NULL) {
if (idx->chunk < pg->chunks_count - 1) {
idx->chunk++;
return (0);
}
pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port);
if (pg != NULL)
break;
}
post_pack = PACK_IDX(*addr, *nat_proto, *port);
if (pre_pack == post_pack)
DPRINTF(DP_STATE, "XXX: PACK_IDX %u %d %d",
*addr, *nat_proto, *port);
return (pg);
idx->chunk = 0;
/* Then over PGs */
if (idx->port < UINT16_MAX - 64) {
idx->port += 64;
return (0);
}
idx->port = NAT64_MIN_PORT;
/* Then over supported protocols */
switch (idx->proto) {
case IPPROTO_ICMP:
idx->proto = IPPROTO_TCP;
return (0);
case IPPROTO_TCP:
idx->proto = IPPROTO_UDP;
return (0);
default:
idx->proto = IPPROTO_ICMP;
}
/* And then over IPv4 alias addresses */
if (idx->addr < cfg->pmask4) {
idx->addr++;
return (1); /* New states group is needed */
}
idx->index = LAST_IDX;
return (-1); /* No more states */
}
static NAT64NOINLINE struct nat64lsn_portgroup *
get_first_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
uint16_t *port)
static struct nat64lsn_pg*
nat64lsn_get_pg_byidx(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx)
{
struct nat64lsn_portgroup *pg;
struct nat64lsn_alias *alias;
int pg_idx;
pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port);
if (pg == NULL)
pg = get_next_pg(cfg, addr, nat_proto, port);
alias = &cfg->aliases[idx->addr & ((1 << (32 - cfg->plen4)) - 1)];
MPASS(alias->addr == idx->addr);
return (pg);
pg_idx = (idx->port - NAT64_MIN_PORT) / 64;
switch (idx->proto) {
case IPPROTO_ICMP:
if (ISSET32(alias->icmp_pgmask[pg_idx / 32], pg_idx % 32))
return (alias->icmp[pg_idx / 32]->pgptr[pg_idx % 32]);
break;
case IPPROTO_TCP:
if (ISSET32(alias->tcp_pgmask[pg_idx / 32], pg_idx % 32))
return (alias->tcp[pg_idx / 32]->pgptr[pg_idx % 32]);
break;
case IPPROTO_UDP:
if (ISSET32(alias->udp_pgmask[pg_idx / 32], pg_idx % 32))
return (alias->udp[pg_idx / 32]->pgptr[pg_idx % 32]);
break;
}
return (NULL);
}
/*
* Lists nat64lsn states.
* Data layout (v0)(current):
* Data layout (v0):
* Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
* Reply: [ ipfw_obj_header ipfw_obj_data [
* ipfw_nat64lsn_stg ipfw_nat64lsn_state x N] ]
@ -682,19 +676,36 @@ get_first_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
* Returns 0 on success
*/
static int
nat64lsn_states(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
nat64lsn_states_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
struct sockopt_data *sd)
{
/* TODO: implement states listing for old ipfw(8) binaries */
return (EOPNOTSUPP);
}
/*
* Lists nat64lsn states.
* Data layout (v1)(current):
* Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
* Reply: [ ipfw_obj_header ipfw_obj_data [
* ipfw_nat64lsn_stg_v1 ipfw_nat64lsn_state_v1 x N] ]
*
* Returns 0 on success
*/
static int
nat64lsn_states_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
struct sockopt_data *sd)
{
ipfw_obj_header *oh;
ipfw_obj_data *od;
ipfw_nat64lsn_stg *stg;
ipfw_nat64lsn_stg_v1 *stg;
struct nat64lsn_cfg *cfg;
struct nat64lsn_portgroup *pg, *pg_next;
uint64_t next_idx;
struct nat64lsn_pg *pg;
union nat64lsn_pgidx idx;
size_t sz;
uint32_t addr, states;
uint16_t port;
uint8_t nat_proto;
uint32_t count, total;
int ret;
sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
sizeof(uint64_t);
@ -708,78 +719,96 @@ nat64lsn_states(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
od->head.length != sz - sizeof(ipfw_obj_header))
return (EINVAL);
next_idx = *(uint64_t *)(od + 1);
/* Translate index to the request position to start from */
UNPACK_IDX(next_idx, addr, nat_proto, port);
if (nat_proto >= NAT_MAX_PROTO)
idx.index = *(uint64_t *)(od + 1);
if (idx.index != 0 && idx.proto != IPPROTO_ICMP &&
idx.proto != IPPROTO_TCP && idx.proto != IPPROTO_UDP)
return (EINVAL);
if (nat_proto == 0 && addr != 0)
if (idx.index == LAST_IDX)
return (EINVAL);
IPFW_UH_RLOCK(ch);
cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
if (cfg == NULL) {
IPFW_UH_RUNLOCK(ch);
return (ESRCH);
return (ENOENT);
}
/* Fill in starting point */
if (addr == 0) {
addr = cfg->prefix4;
nat_proto = 1;
port = 0;
if (idx.index == 0) { /* Fill in starting point */
idx.addr = cfg->prefix4;
idx.proto = IPPROTO_ICMP;
idx.port = NAT64_MIN_PORT;
}
if (addr < cfg->prefix4 || addr > cfg->pmask4) {
if (idx.addr < cfg->prefix4 || idx.addr > cfg->pmask4 ||
idx.port < NAT64_MIN_PORT) {
IPFW_UH_RUNLOCK(ch);
DPRINTF(DP_GENERIC | DP_STATE, "XXX: %ju %u %u",
(uintmax_t)next_idx, addr, cfg->pmask4);
return (EINVAL);
}
sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
sizeof(ipfw_nat64lsn_stg);
if (sd->valsize < sz)
sizeof(ipfw_nat64lsn_stg_v1);
if (sd->valsize < sz) {
IPFW_UH_RUNLOCK(ch);
return (ENOMEM);
}
oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sz);
od = (ipfw_obj_data *)(oh + 1);
od->head.type = IPFW_TLV_OBJDATA;
od->head.length = sz - sizeof(ipfw_obj_header);
stg = (ipfw_nat64lsn_stg *)(od + 1);
pg = get_first_pg(cfg, &addr, &nat_proto, &port);
if (pg == NULL) {
/* No states */
stg->next_idx = 0xFF;
stg->count = 0;
IPFW_UH_RUNLOCK(ch);
return (0);
}
states = 0;
pg_next = NULL;
while (pg != NULL) {
pg_next = get_next_pg(cfg, &addr, &nat_proto, &port);
if (pg_next == NULL)
stg->next_idx = 0xFF;
else
stg->next_idx = PACK_IDX(addr, nat_proto, port);
if (export_pg_states(cfg, pg, stg, sd) != 0) {
IPFW_UH_RUNLOCK(ch);
return (states == 0 ? ENOMEM: 0);
}
states += stg->count;
od->head.length += stg->count * sizeof(ipfw_nat64lsn_state);
sz += stg->count * sizeof(ipfw_nat64lsn_state);
if (pg_next != NULL) {
sz += sizeof(ipfw_nat64lsn_stg);
if (sd->valsize < sz)
stg = (ipfw_nat64lsn_stg_v1 *)(od + 1);
stg->count = total = 0;
stg->next.index = idx.index;
/*
* Acquire CALLOUT_LOCK to avoid races with expiration code.
* Thus states, hosts and PGs will not expire while we hold it.
*/
CALLOUT_LOCK(cfg);
ret = 0;
do {
pg = nat64lsn_get_pg_byidx(cfg, &idx);
if (pg != NULL) {
count = 0;
ret = nat64lsn_export_states_v1(cfg, &idx, pg,
sd, &count);
if (ret != 0)
break;
stg = (ipfw_nat64lsn_stg *)ipfw_get_sopt_space(sd,
sizeof(ipfw_nat64lsn_stg));
if (count > 0) {
stg->count += count;
total += count;
/* Update total size of reply */
od->head.length +=
count * sizeof(ipfw_nat64lsn_state_v1);
sz += count * sizeof(ipfw_nat64lsn_state_v1);
}
stg->alias4.s_addr = htonl(idx.addr);
}
pg = pg_next;
}
/* Determine new index */
switch (nat64lsn_next_pgidx(cfg, pg, &idx)) {
case -1:
ret = ENOENT; /* End of search */
break;
case 1: /*
* Next alias address, new group may be needed.
* If states count is zero, use this group.
*/
if (stg->count == 0)
continue;
/* Otherwise try to create new group */
sz += sizeof(ipfw_nat64lsn_stg_v1);
if (sd->valsize < sz) {
ret = ENOMEM;
break;
}
/* Save next index in current group */
stg->next.index = idx.index;
stg = (ipfw_nat64lsn_stg_v1 *)ipfw_get_sopt_space(sd,
sizeof(ipfw_nat64lsn_stg_v1));
od->head.length += sizeof(ipfw_nat64lsn_stg_v1);
stg->count = 0;
break;
}
stg->next.index = idx.index;
} while (ret == 0);
CALLOUT_UNLOCK(cfg);
IPFW_UH_RUNLOCK(ch);
return (0);
return ((total > 0 || idx.index == LAST_IDX) ? 0: ret);
}
static struct ipfw_sopt_handler scodes[] = {
@ -789,7 +818,8 @@ static struct ipfw_sopt_handler scodes[] = {
{ IP_FW_NAT64LSN_LIST, 0, HDIR_GET, nat64lsn_list },
{ IP_FW_NAT64LSN_STATS, 0, HDIR_GET, nat64lsn_stats },
{ IP_FW_NAT64LSN_RESET_STATS,0, HDIR_SET, nat64lsn_reset_stats },
{ IP_FW_NAT64LSN_LIST_STATES,0, HDIR_GET, nat64lsn_states },
{ IP_FW_NAT64LSN_LIST_STATES,0, HDIR_GET, nat64lsn_states_v0 },
{ IP_FW_NAT64LSN_LIST_STATES,1, HDIR_GET, nat64lsn_states_v1 },
};
static int