PFIL_MEMPTR for ipfw link level hook

With new pfil(9) KPI it is possible to pass a void pointer with length
instead of mbuf pointer to a packet filter. Until this commit no filters
supported that, so pfil run through a shim function pfil_fake_mbuf().

Now the ipfw(4) hook named "default-link", that is instantiated when
net.link.ether.ipfw sysctl is on, supports processing pointer/length
packets natively.

- ip_fw_args now has union for either mbuf or void *, and if flags have
  non-zero length, then we use the void *.
- through ipfw_chk() we handle mem/mbuf cases differently.
- ether_header goes away from args. It is ipfw_chk() responsibility
  to do parsing of Ethernet header.
- ipfw_log() now uses different bpf APIs to log packets.

Although ipfw_chk() is now capable to process pointer/length packets,
this commit adds support for the link level hook only, see
ipfw_check_frame(). Potentially the IP processing hook ipfw_check_packet()
can be improved too, but that requires more changes since the hook
supports more complex actions: NAT, divert, etc.

Reviewed by:	ae
Differential Revision:	https://reviews.freebsd.org/D19357
This commit is contained in:
Gleb Smirnoff 2019-03-14 22:52:16 +00:00
parent dc0fa4f712
commit f355cb3e6f
5 changed files with 177 additions and 113 deletions

View File

@ -1258,7 +1258,6 @@ jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, int num,
*
* args->m (in/out) The packet; we set to NULL when/if we nuke it.
* Starts with the IP header.
* args->eh (in) Mac header if present, NULL for layer3 packet.
* args->L3offset Number of bytes bypassed if we came from L2.
* e.g. often sizeof(eh) ** NOTYET **
* args->ifp Incoming or outgoing interface.
@ -1297,23 +1296,19 @@ ipfw_chk(struct ip_fw_args *args)
* the implementation of the various instructions to make sure
* that they still work.
*
* args->eh The MAC header. It is non-null for a layer2
* packet, it is NULL for a layer-3 packet.
* **notyet**
* args->L3offset Offset in the packet to the L3 (IP or equiv.) header.
*
* m | args->m Pointer to the mbuf, as received from the caller.
* It may change if ipfw_chk() does an m_pullup, or if it
* consumes the packet because it calls send_reject().
* XXX This has to change, so that ipfw_chk() never modifies
* or consumes the buffer.
* ip is the beginning of the ip(4 or 6) header.
* Calculated by adding the L3offset to the start of data.
* (Until we start using L3offset, the packet is
* supposed to start with the ip header).
* OR
* args->mem Pointer to contigous memory chunk.
* ip Is the beginning of the ip(4 or 6) header.
* eh Ethernet header in case if input is Layer2.
*/
struct mbuf *m = args->m;
struct ip *ip = mtod(m, struct ip *);
struct mbuf *m;
struct ip *ip;
struct ether_header *eh;
/*
* For rules which contain uid/gid or jail constraints, cache
@ -1370,7 +1365,6 @@ ipfw_chk(struct ip_fw_args *args)
struct in_addr src_ip, dst_ip; /* NOTE: network format */
int iplen = 0;
int pktlen;
uint16_t etype; /* Host order stored ether type */
struct ipfw_dyn_info dyn_info;
struct ip_fw *q = NULL;
@ -1394,14 +1388,45 @@ ipfw_chk(struct ip_fw_args *args)
int done = 0; /* flag to exit the outer loop */
IPFW_RLOCK_TRACKER;
bool mem;
if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready))
return (IP_FW_PASS); /* accept */
if ((mem = (args->flags & IPFW_ARGS_LENMASK))) {
if (args->flags & IPFW_ARGS_ETHER) {
eh = (struct ether_header *)args->mem;
if (eh->ether_type == htons(ETHERTYPE_VLAN))
ip = (struct ip *)
((struct ether_vlan_header *)eh + 1);
else
ip = (struct ip *)(eh + 1);
} else {
eh = NULL;
ip = (struct ip *)args->mem;
}
pktlen = IPFW_ARGS_LENGTH(args->flags);
args->f_id.fib = args->ifp->if_fib; /* best guess */
} else {
m = args->m;
if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready))
return (IP_FW_PASS); /* accept */
if (args->flags & IPFW_ARGS_ETHER) {
/* We need some amount of data to be contiguous. */
if (m->m_len < min(m->m_pkthdr.len, max_protohdr) &&
(args->m = m = m_pullup(m, min(m->m_pkthdr.len,
max_protohdr))) == NULL)
goto pullup_failed;
eh = mtod(m, struct ether_header *);
ip = (struct ip *)(eh + 1);
} else {
eh = NULL;
ip = mtod(m, struct ip *);
}
pktlen = m->m_pkthdr.len;
args->f_id.fib = M_GETFIB(m); /* mbuf not altered */
}
dst_ip.s_addr = 0; /* make sure it is initialized */
src_ip.s_addr = 0; /* make sure it is initialized */
src_port = dst_port = 0;
pktlen = m->m_pkthdr.len;
DYN_INFO_INIT(&dyn_info);
/*
@ -1411,28 +1436,41 @@ ipfw_chk(struct ip_fw_args *args)
* this way).
*/
#define PULLUP_TO(_len, p, T) PULLUP_LEN(_len, p, sizeof(T))
#define EHLEN (eh != NULL ? ((char *)ip - (char *)eh) : 0)
#define PULLUP_LEN(_len, p, T) \
do { \
int x = (_len) + T; \
if ((m)->m_len < x) { \
args->m = m = m_pullup(m, x); \
if (m == NULL) \
goto pullup_failed; \
int x = (_len) + T + EHLEN; \
if (mem) { \
MPASS(pktlen >= x); \
p = (char *)args->mem + (_len) + EHLEN; \
} else { \
if (__predict_false((m)->m_len < x)) { \
args->m = m = m_pullup(m, x); \
if (m == NULL) \
goto pullup_failed; \
} \
p = mtod(m, char *) + (_len) + EHLEN; \
} \
} while (0)
/*
* In case pointers got stale after pullups, update them.
*/
#define UPDATE_POINTERS() \
do { \
if (!mem) { \
if (eh != NULL) { \
eh = mtod(m, struct ether_header *); \
ip = (struct ip *)(eh + 1); \
} else \
ip = mtod(m, struct ip *); \
args->m = m; \
} \
p = (mtod(m, char *) + (_len)); \
} while (0)
/*
* if we have an ether header,
*/
if (args->flags & IPFW_ARGS_ETHER)
etype = ntohs(args->eh->ether_type);
else
etype = 0;
/* Identify IP packets and fill up variables. */
if (pktlen >= sizeof(struct ip6_hdr) &&
(etype == 0 || etype == ETHERTYPE_IPV6) && ip->ip_v == 6) {
(eh == NULL || eh->ether_type == htons(ETHERTYPE_IPV6)) &&
ip->ip_v == 6) {
struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
is_ipv6 = 1;
@ -1609,7 +1647,7 @@ do { \
break;
} /*switch */
}
ip = mtod(m, struct ip *);
UPDATE_POINTERS();
ip6 = (struct ip6_hdr *)ip;
args->f_id.addr_type = 6;
args->f_id.src_ip6 = ip6->ip6_src;
@ -1617,7 +1655,8 @@ do { \
args->f_id.flow_id6 = ntohl(ip6->ip6_flow);
iplen = ntohs(ip6->ip6_plen) + sizeof(*ip6);
} else if (pktlen >= sizeof(struct ip) &&
(etype == 0 || etype == ETHERTYPE_IP) && ip->ip_v == 4) {
(eh == NULL || eh->ether_type == htons(ETHERTYPE_IP)) &&
ip->ip_v == 4) {
is_ipv4 = 1;
args->flags |= IPFW_ARGS_IP4;
hlen = ip->ip_hl << 2;
@ -1675,7 +1714,7 @@ do { \
}
}
ip = mtod(m, struct ip *);
UPDATE_POINTERS();
args->f_id.addr_type = 4;
args->f_id.src_ip = ntohl(src_ip.s_addr);
args->f_id.dst_ip = ntohl(dst_ip.s_addr);
@ -1692,7 +1731,6 @@ do { \
args->f_id.proto = proto;
args->f_id.src_port = src_port = ntohs(src_port);
args->f_id.dst_port = dst_port = ntohs(dst_port);
args->f_id.fib = M_GETFIB(m);
IPFW_PF_RLOCK(chain);
if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */
@ -1720,7 +1758,7 @@ do { \
oif = NULL;
} else {
MPASS(args->flags & IPFW_ARGS_OUT);
iif = m->m_pkthdr.rcvif;
iif = mem ? NULL : m->m_pkthdr.rcvif;
oif = args->ifp;
}
@ -1840,7 +1878,7 @@ do { \
((ipfw_insn_mac *)cmd)->addr;
u_int32_t *mask = (u_int32_t *)
((ipfw_insn_mac *)cmd)->mask;
u_int32_t *hdr = (u_int32_t *)args->eh;
u_int32_t *hdr = (u_int32_t *)eh;
match =
( want[0] == (hdr[0] & mask[0]) &&
@ -1857,8 +1895,11 @@ do { \
for (i = cmdlen - 1; !match && i>0;
i--, p += 2)
match = (etype >= p[0] &&
etype <= p[1]);
match =
(ntohs(eh->ether_type) >=
p[0] &&
ntohs(eh->ether_type) <=
p[1]);
}
break;
@ -2332,7 +2373,7 @@ do { \
}
case O_LOG:
ipfw_log(chain, f, hlen, args, m,
ipfw_log(chain, f, hlen, args,
offset | ip6f_mf, tablearg, ip);
match = 1;
break;

View File

@ -160,6 +160,28 @@ ipfwlog_clone_create(struct if_clone *ifc, int unit, caddr_t params)
return (0);
}
void
ipfw_bpf_tap(u_char *pkt, u_int pktlen)
{
LOGIF_RLOCK_TRACKER;
LOGIF_RLOCK();
if (V_log_if != NULL)
BPF_TAP(V_log_if, pkt, pktlen);
LOGIF_RUNLOCK();
}
void
ipfw_bpf_mtap(struct mbuf *m)
{
LOGIF_RLOCK_TRACKER;
LOGIF_RLOCK();
if (V_log_if != NULL)
BPF_MTAP(V_log_if, m);
LOGIF_RUNLOCK();
}
void
ipfw_bpf_mtap2(void *data, u_int dlen, struct mbuf *m)
{

View File

@ -99,30 +99,32 @@ __FBSDID("$FreeBSD$");
*/
void
ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
struct ip_fw_args *args, struct mbuf *m,
u_short offset, uint32_t tablearg, struct ip *ip)
struct ip_fw_args *args, u_short offset, uint32_t tablearg, struct ip *ip)
{
char *action;
int limit_reached = 0;
char action2[92], proto[128], fragment[32];
if (V_fw_verbose == 0) {
if (args->flags & IPFW_ARGS_ETHER) /* layer2, use orig hdr */
ipfw_bpf_mtap2(args->eh, ETHER_HDR_LEN, m);
if (args->flags & IPFW_ARGS_LENMASK)
ipfw_bpf_tap(args->mem, IPFW_ARGS_LENGTH(args->flags));
else if (args->flags & IPFW_ARGS_ETHER)
/* layer2, use orig hdr */
ipfw_bpf_mtap(args->m);
else {
/* Add fake header. Later we will store
* more info in the header.
*/
if (ip->ip_v == 4)
ipfw_bpf_mtap2("DDDDDDSSSSSS\x08\x00",
ETHER_HDR_LEN, m);
ETHER_HDR_LEN, args->m);
else if (ip->ip_v == 6)
ipfw_bpf_mtap2("DDDDDDSSSSSS\x86\xdd",
ETHER_HDR_LEN, m);
ETHER_HDR_LEN, args->m);
else
/* Obviously bogus EtherType. */
ipfw_bpf_mtap2("DDDDDDSSSSSS\xff\xff",
ETHER_HDR_LEN, m);
ETHER_HDR_LEN, args->m);
}
return;
}

View File

@ -328,69 +328,50 @@ ipfw_check_packet(struct mbuf **m0, struct ifnet *ifp, int flags,
* ipfw processing for ethernet packets (in and out).
*/
static pfil_return_t
ipfw_check_frame(struct mbuf **m0, struct ifnet *ifp, int flags,
ipfw_check_frame(pfil_packet_t p, struct ifnet *ifp, int flags,
void *ruleset __unused, struct inpcb *inp)
{
struct ip_fw_args args;
struct ether_header save_eh;
struct ether_header *eh;
struct m_tag *mtag;
struct mbuf *m;
pfil_return_t ret;
int i;
bool mem, realloc;
int ipfw;
args.flags = IPFW_ARGS_ETHER;
if (flags & PFIL_MEMPTR) {
mem = true;
realloc = false;
args.flags = PFIL_LENGTH(flags) | IPFW_ARGS_ETHER;
args.mem = p.mem;
} else {
mem = realloc = false;
args.flags = IPFW_ARGS_ETHER;
}
args.flags |= (flags & PFIL_IN) ? IPFW_ARGS_IN : IPFW_ARGS_OUT;
again:
/* fetch start point from rule, if any. remove the tag if present. */
mtag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL);
if (mtag != NULL) {
args.rule = *((struct ipfw_rule_ref *)(mtag+1));
m_tag_delete(*m0, mtag);
if (args.rule.info & IPFW_ONEPASS)
return (0);
args.flags |= IPFW_ARGS_REF;
}
/* I need some amt of data to be contiguous */
m = *m0;
i = min(m->m_pkthdr.len, max_protohdr);
if (m->m_len < i) {
m = m_pullup(m, i);
if (m == NULL) {
*m0 = m;
return (0);
}
}
eh = mtod(m, struct ether_header *);
save_eh = *eh; /* save copy for restore below */
m_adj(m, ETHER_HDR_LEN); /* strip ethernet header */
args.m = m; /* the packet we are looking at */
args.ifp = ifp;
args.eh = &save_eh; /* MAC header for bridged/MAC packets */
args.inp = inp; /* used by ipfw uid/gid/jail rules */
i = ipfw_chk(&args);
m = args.m;
if (m != NULL) {
args.inp = inp;
again:
if (!mem) {
/*
* Restore Ethernet header, as needed, in case the
* mbuf chain was replaced by ipfw.
* Fetch start point from rule, if any.
* Remove the tag if present.
*/
M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
if (m == NULL) {
*m0 = NULL;
return (0);
struct m_tag *mtag;
mtag = m_tag_locate(*p.m, MTAG_IPFW_RULE, 0, NULL);
if (mtag != NULL) {
args.rule = *((struct ipfw_rule_ref *)(mtag+1));
m_tag_delete(*p.m, mtag);
if (args.rule.info & IPFW_ONEPASS)
return (PFIL_PASS);
args.flags |= IPFW_ARGS_REF;
}
if (eh != mtod(m, struct ether_header *))
bcopy(&save_eh, mtod(m, struct ether_header *),
ETHER_HDR_LEN);
args.m = *p.m;
}
*m0 = m;
ipfw = ipfw_chk(&args);
ret = PFIL_PASS;
/* Check result of ipfw_chk() */
switch (i) {
switch (ipfw) {
case IP_FW_PASS:
break;
@ -403,9 +384,16 @@ ipfw_check_frame(struct mbuf **m0, struct ifnet *ifp, int flags,
ret = PFIL_DROPPED;
break;
}
*m0 = NULL;
if (mem) {
if (pfil_realloc(&p, flags, ifp) != 0) {
ret = PFIL_DROPPED;
break;
}
mem = false;
realloc = true;
}
MPASS(args.flags & IPFW_ARGS_REF);
ip_dn_io_ptr(&m, &args);
ip_dn_io_ptr(p.m, &args);
return (PFIL_CONSUMED);
case IP_FW_NGTEE:
@ -414,9 +402,17 @@ ipfw_check_frame(struct mbuf **m0, struct ifnet *ifp, int flags,
ret = PFIL_DROPPED;
break;
}
if (mem) {
if (pfil_realloc(&p, flags, ifp) != 0) {
ret = PFIL_DROPPED;
break;
}
mem = false;
realloc = true;
}
MPASS(args.flags & IPFW_ARGS_REF);
(void )ng_ipfw_input_p(m0, &args, i == IP_FW_NGTEE);
if (i == IP_FW_NGTEE) /* ignore errors for NGTEE */
(void )ng_ipfw_input_p(p.m, &args, ipfw == IP_FW_NGTEE);
if (ipfw == IP_FW_NGTEE) /* ignore errors for NGTEE */
goto again; /* continue with packet */
ret = PFIL_CONSUMED;
break;
@ -425,12 +421,15 @@ ipfw_check_frame(struct mbuf **m0, struct ifnet *ifp, int flags,
KASSERT(0, ("%s: unknown retval", __func__));
}
if (ret != PFIL_PASS) {
if (*m0)
FREE_PKT(*m0);
*m0 = NULL;
if (!mem && ret != PFIL_PASS) {
if (*p.m)
FREE_PKT(*p.m);
*p.m = NULL;
}
if (realloc && ret == PFIL_PASS)
ret = PFIL_REALLOCED;
return (ret);
}
@ -545,7 +544,7 @@ ipfw_hook(int onoff, int pf)
pfil_hook_t *h;
pha.pa_version = PFIL_VERSION;
pha.pa_flags = PFIL_IN | PFIL_OUT;
pha.pa_flags = PFIL_IN | PFIL_OUT | PFIL_MEMPTR;
pha.pa_modname = "ipfw";
pha.pa_ruleset = NULL;

View File

@ -111,14 +111,11 @@ struct ip_fw_args {
struct inpcb *inp;
union {
/*
* We don't support forwarding on layer2, thus we can
* keep eh pointer in this union.
* next_hop[6] pointers can be used to point to next hop
* stored in rule's opcode to avoid copying into hopstore.
* Also, it is expected that all 0x1-0x10 flags are mutually
* exclusive.
*/
struct ether_header *eh; /* for bridged packets */
struct sockaddr_in *next_hop;
struct sockaddr_in6 *next_hop6;
/* ipfw next hop storage */
@ -129,8 +126,10 @@ struct ip_fw_args {
uint16_t sin6_port;
} hopstore6;
};
struct mbuf *m; /* the mbuf chain */
union {
struct mbuf *m; /* the mbuf chain */
void *mem; /* or memory pointer */
};
struct ipfw_flow_id f_id; /* grabbed from IP header */
};
@ -164,10 +163,11 @@ struct ip_fw_chain;
void ipfw_bpf_init(int);
void ipfw_bpf_uninit(int);
void ipfw_bpf_tap(u_char *, u_int);
void ipfw_bpf_mtap(struct mbuf *);
void ipfw_bpf_mtap2(void *, u_int, struct mbuf *);
void ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
struct ip_fw_args *args, struct mbuf *m,
u_short offset, uint32_t tablearg, struct ip *ip);
struct ip_fw_args *args, u_short offset, uint32_t tablearg, struct ip *ip);
VNET_DECLARE(u_int64_t, norule_counter);
#define V_norule_counter VNET(norule_counter)
VNET_DECLARE(int, verbose_limit);