pf: support dummynet

Allow pf to use dummynet pipes and queues.

We re-use the currently unused IPFW_IS_DUMMYNET flag to allow dummynet
to tell us that a packet is being re-injected after being delayed. This
is needed to avoid endlessly looping the packet between pf and dummynet.

MFC after:	2 weeks
Sponsored by:	Rubicon Communications, LLC ("Netgate")
Differential Revision:	https://reviews.freebsd.org/D31904
This commit is contained in:
Kristof Provost 2021-05-15 13:49:22 +02:00
parent 8160a0f62b
commit 63b3c1c770
8 changed files with 256 additions and 4 deletions

View File

@ -480,6 +480,9 @@ pf_nvrule_to_rule(const nvlist_t *nvl, struct pfctl_rule *rule)
nvlist_get_number(nvl, "max_src_conn_rate.seconds");
rule->qid = nvlist_get_number(nvl, "qid");
rule->pqid = nvlist_get_number(nvl, "pqid");
rule->dnpipe = nvlist_get_number(nvl, "dnpipe");
rule->dnrpipe = nvlist_get_number(nvl, "dnrpipe");
rule->free_flags = nvlist_get_number(nvl, "dnflags");
rule->prob = nvlist_get_number(nvl, "prob");
rule->cuid = nvlist_get_number(nvl, "cuid");
rule->cpid = nvlist_get_number(nvl, "cpid");
@ -584,6 +587,9 @@ pfctl_add_rule(int dev, const struct pfctl_rule *r, const char *anchor,
r->max_src_conn_rate.limit);
nvlist_add_number(nvlr, "max_src_conn_rate.seconds",
r->max_src_conn_rate.seconds);
nvlist_add_number(nvlr, "dnpipe", r->dnpipe);
nvlist_add_number(nvlr, "dnrpipe", r->dnrpipe);
nvlist_add_number(nvlr, "dnflags", r->free_flags);
nvlist_add_number(nvlr, "prob", r->prob);
nvlist_add_number(nvlr, "cuid", r->cuid);
nvlist_add_number(nvlr, "cpid", r->cpid);

View File

@ -114,6 +114,9 @@ struct pfctl_rule {
} max_src_conn_rate;
u_int32_t qid;
u_int32_t pqid;
u_int16_t dnpipe;
u_int16_t dnrpipe;
u_int32_t free_flags;
u_int32_t nr;
u_int32_t prob;
uid_t cuid;

View File

@ -248,6 +248,9 @@ static struct filter_opts {
char *tag;
char *match_tag;
u_int8_t match_tag_not;
u_int16_t dnpipe;
u_int16_t dnrpipe;
u_int32_t free_flags;
u_int rtableid;
u_int8_t prio;
u_int8_t set_prio[2];
@ -468,6 +471,7 @@ int parseport(char *, struct range *r, int);
%token BITMASK RANDOM SOURCEHASH ROUNDROBIN STATICPORT PROBABILITY MAPEPORTSET
%token ALTQ CBQ CODEL PRIQ HFSC FAIRQ BANDWIDTH TBRSIZE LINKSHARE REALTIME
%token UPPERLIMIT QUEUE PRIORITY QLIMIT HOGS BUCKETS RTABLE TARGET INTERVAL
%token DNPIPE DNQUEUE
%token LOAD RULESET_OPTIMIZATION PRIO
%token STICKYADDRESS MAXSRCSTATES MAXSRCNODES SOURCETRACK GLOBAL RULE
%token MAXSRCCONN MAXSRCCONNRATE OVERLOAD FLUSH SLOPPY
@ -2464,6 +2468,15 @@ pfrule : action dir logquick interface route af proto fromto
}
#endif
if ($9.dnpipe || $9.dnrpipe) {
r.dnpipe = $9.dnpipe;
r.dnrpipe = $9.dnrpipe;
if ($9.free_flags & PFRULE_DN_IS_PIPE)
r.free_flags |= PFRULE_DN_IS_PIPE;
else
r.free_flags |= PFRULE_DN_IS_QUEUE;
}
expand_rule(&r, $4, $5.host, $7, $8.src_os,
$8.src.host, $8.src.port, $8.dst.host, $8.dst.port,
$9.uid, $9.gid, $9.icmpspec, "");
@ -2565,6 +2578,32 @@ filter_opt : USER uids {
}
filter_opts.queues = $1;
}
| DNPIPE number {
filter_opts.dnpipe = $2;
filter_opts.free_flags |= PFRULE_DN_IS_PIPE;
}
| DNPIPE '(' number ')' {
filter_opts.dnpipe = $3;
filter_opts.free_flags |= PFRULE_DN_IS_PIPE;
}
| DNPIPE '(' number comma number ')' {
filter_opts.dnrpipe = $5;
filter_opts.dnpipe = $3;
filter_opts.free_flags |= PFRULE_DN_IS_PIPE;
}
| DNQUEUE number {
filter_opts.dnpipe = $2;
filter_opts.free_flags |= PFRULE_DN_IS_QUEUE;
}
| DNQUEUE '(' number comma number ')' {
filter_opts.dnrpipe = $5;
filter_opts.dnpipe = $3;
filter_opts.free_flags |= PFRULE_DN_IS_QUEUE;
}
| DNQUEUE '(' number ')' {
filter_opts.dnpipe = $3;
filter_opts.free_flags |= PFRULE_DN_IS_QUEUE;
}
| TAG string {
filter_opts.tag = $2;
}
@ -5592,6 +5631,8 @@ lookup(char *s)
{ "debug", DEBUG},
{ "divert-reply", DIVERTREPLY},
{ "divert-to", DIVERTTO},
{ "dnpipe", DNPIPE},
{ "dnqueue", DNQUEUE},
{ "drop", DROP},
{ "drop-ovl", FRAGDROP},
{ "dup-to", DUPTO},

View File

@ -1018,6 +1018,15 @@ print_rule(struct pfctl_rule *r, const char *anchor_call, int verbose, int numer
i = 0;
while (r->label[i][0])
printf(" label \"%s\"", r->label[i++]);
/* Only dnrpipe as we might do (0, 42) to only queue return traffic. */
if (r->dnrpipe)
printf(" %s(%d, %d)",
r->free_flags & PFRULE_DN_IS_PIPE ? "dnpipe" : "dnqueue",
r->dnpipe, r->dnrpipe);
else if (r->dnpipe)
printf(" %s %d",
r->free_flags & PFRULE_DN_IS_PIPE ? "dnpipe" : "dnqueue",
r->dnpipe);
if (r->qname[0] && r->pqname[0])
printf(" queue(%s, %s)", r->qname, r->pqname);
else if (r->qname[0])

View File

@ -563,6 +563,9 @@ struct pf_kpool {
struct pf_rule_actions {
uint16_t qid;
uint16_t pqid;
uint16_t dnpipe;
uint16_t dnrpipe; /* Reverse direction pipe */
uint32_t flags;
};
union pf_krule_ptr {
@ -608,6 +611,9 @@ struct pf_krule {
} max_src_conn_rate;
u_int16_t qid;
u_int16_t pqid;
u_int16_t dnpipe;
u_int16_t dnrpipe;
u_int32_t free_flags;
u_int32_t nr;
u_int32_t prob;
uid_t cuid;
@ -755,6 +761,8 @@ struct pf_state_cmp {
/* was PFSTATE_PFLOW 0x04 */
#define PFSTATE_NOSYNC 0x08
#define PFSTATE_ACK 0x10
#define PFRULE_DN_IS_PIPE 0x40
#define PFRULE_DN_IS_QUEUE 0x80
#define PFSTATE_SETPRIO 0x0200
#define PFSTATE_SETMASK (PFSTATE_SETPRIO)
@ -858,6 +866,8 @@ struct pf_kstate {
u_int32_t pfsync_time;
u_int16_t qid;
u_int16_t pqid;
u_int16_t dnpipe;
u_int16_t dnrpipe;
u_int16_t tag;
u_int8_t log;
};

View File

@ -841,7 +841,8 @@ tag_mbuf(struct mbuf *m, int dir, struct ip_fw_args *fwa)
m_tag_prepend(m, mtag); /* Attach to mbuf chain. */
dt = (struct dn_pkt_tag *)(mtag + 1);
dt->rule = fwa->rule;
dt->rule.info &= IPFW_ONEPASS; /* only keep this info */
/* only keep this info */
dt->rule.info &= (IPFW_ONEPASS | IPFW_IS_DUMMYNET);
dt->dn_dir = dir;
dt->ifp = fwa->flags & IPFW_ARGS_OUT ? fwa->ifp : NULL;
/* dt->output tame is updated as we move through */

View File

@ -94,6 +94,13 @@ __FBSDID("$FreeBSD$");
#include <netinet/udp.h>
#include <netinet/udp_var.h>
/* dummynet */
#include <netinet/ip_dummynet.h>
#include <netinet/ip_fw.h>
#include <netpfil/ipfw/dn_heap.h>
#include <netpfil/ipfw/ip_fw_private.h>
#include <netpfil/ipfw/ip_dn_private.h>
#ifdef INET6
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
@ -3310,6 +3317,12 @@ pf_rule_to_actions(struct pf_krule *r, struct pf_rule_actions *a)
a->qid = r->qid;
if (r->pqid)
a->pqid = r->pqid;
if (r->dnpipe)
a->dnpipe = r->dnpipe;
if (r->dnrpipe)
a->dnpipe = r->dnrpipe;
if (r->free_flags & PFRULE_DN_IS_PIPE)
a->flags |= PFRULE_DN_IS_PIPE;
}
int
@ -3982,6 +3995,9 @@ pf_create_state(struct pf_krule *r, struct pf_krule *nr, struct pf_krule *a,
s->sync_state = PFSYNC_S_NONE;
s->qid = pd->act.qid;
s->pqid = pd->act.pqid;
s->dnpipe = pd->act.dnpipe;
s->dnrpipe = pd->act.dnrpipe;
s->state_flags |= pd->act.flags;
if (nr != NULL)
s->log |= nr->log & PF_LOG_ALL;
switch (pd->proto) {
@ -6226,6 +6242,64 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t a
return (0);
}
static bool
pf_pdesc_to_dnflow(int dir, const struct pf_pdesc *pd,
const struct pf_krule *r, const struct pf_kstate *s,
struct ip_fw_args *dnflow)
{
int dndir = r->direction;
if (s && dndir == PF_INOUT)
dndir = s->direction;
memset(dnflow, 0, sizeof(*dnflow));
if (pd->dport != NULL)
dnflow->f_id.dst_port = ntohs(*pd->dport);
if (pd->sport != NULL)
dnflow->f_id.src_port = ntohs(*pd->sport);
if (dir == PF_IN)
dnflow->flags |= IPFW_ARGS_IN;
else
dnflow->flags |= IPFW_ARGS_OUT;
if (dir != dndir && pd->act.dnrpipe) {
dnflow->rule.info = pd->act.dnrpipe;
}
else if (dir == dndir) {
dnflow->rule.info = pd->act.dnpipe;
}
else {
return (false);
}
dnflow->rule.info |= IPFW_IS_DUMMYNET;
if (r->free_flags & PFRULE_DN_IS_PIPE)
dnflow->rule.info |= IPFW_IS_PIPE;
dnflow->f_id.proto = pd->proto;
dnflow->f_id.extra = dnflow->rule.info;
switch (pd->af) {
case AF_INET:
dnflow->f_id.addr_type = 4;
dnflow->f_id.src_ip = ntohl(pd->src->v4.s_addr);
dnflow->f_id.dst_ip = ntohl(pd->dst->v4.s_addr);
break;
case AF_INET6:
dnflow->flags |= IPFW_ARGS_IP6;
dnflow->f_id.addr_type = 6;
dnflow->f_id.src_ip6 = pd->src->v6;
dnflow->f_id.dst_ip6 = pd->dst->v6;
break;
default:
panic("Invalid AF");
break;
}
return (true);
}
#ifdef INET
int
pf_test(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp)
@ -6267,10 +6341,11 @@ pf_test(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb *
PF_RULES_RLOCK();
if (__predict_false(ip_divert_ptr != NULL) &&
if ((__predict_false(ip_divert_ptr != NULL) || ip_dn_io_ptr != NULL) &&
((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) {
struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1);
if (rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) {
if ((rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) ||
(rr->info & IPFW_IS_DUMMYNET)) {
if (pd.pf_mtag == NULL &&
((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
action = PF_DROP;
@ -6405,6 +6480,8 @@ pf_test(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb *
log = action != PF_PASS;
goto done;
}
pd.sport = &pd.hdr.udp.uh_sport;
pd.dport = &pd.hdr.udp.uh_dport;
if (pd.hdr.udp.uh_dport == 0 ||
ntohs(pd.hdr.udp.uh_ulen) > m->m_pkthdr.len - off ||
ntohs(pd.hdr.udp.uh_ulen) < sizeof(struct udphdr)) {
@ -6524,6 +6601,47 @@ pf_test(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb *
}
#endif /* ALTQ */
if (s && (s->dnpipe || s->dnrpipe)) {
pd.act.dnpipe = s->dnpipe;
pd.act.dnrpipe = s->dnrpipe;
pd.act.flags = s->state_flags;
} else if (r->dnpipe || r->dnrpipe) {
pd.act.dnpipe = r->dnpipe;
pd.act.dnrpipe = r->dnrpipe;
pd.act.flags = r->free_flags;
}
if ((pd.act.dnpipe || pd.act.dnrpipe) && !PACKET_LOOPED(&pd)) {
if (ip_dn_io_ptr == NULL) {
action = PF_DROP;
REASON_SET(&reason, PFRES_MEMORY);
} else {
struct ip_fw_args dnflow;
if (pd.pf_mtag == NULL &&
((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
action = PF_DROP;
REASON_SET(&reason, PFRES_MEMORY);
if (s)
PF_STATE_UNLOCK(s);
return (action);
}
if (pf_pdesc_to_dnflow(dir, &pd, r, s, &dnflow)) {
ip_dn_io_ptr(m0, &dnflow);
if (*m0 == NULL) {
if (s)
PF_STATE_UNLOCK(s);
return (action);
} else {
/* This is dummynet fast io processing */
m_tag_delete(*m0, m_tag_first(*m0));
pd.pf_mtag->flags &= ~PF_PACKET_LOOPED;
}
}
}
}
/*
* connections redirected to loopback should not match sockets
* bound specifically to loopback due to security implications,
@ -6684,6 +6802,7 @@ pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb
u_short action, reason = 0, log = 0;
struct mbuf *m = *m0, *n = NULL;
struct m_tag *mtag;
struct m_tag *ipfwtag;
struct ip6_hdr *h = NULL;
struct pf_krule *a = NULL, *r = &V_pf_default_rule, *tr, *nr;
struct pf_kstate *s = NULL;
@ -6719,7 +6838,19 @@ pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb
PF_RULES_RLOCK();
/* We do IP header normalization and packet reassembly here */
if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
if (ip_dn_io_ptr != NULL &&
((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) {
struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1);
if (rr->info & IPFW_IS_DUMMYNET) {
if (pd.pf_mtag == NULL &&
((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
action = PF_DROP;
goto done;
}
pd.pf_mtag->flags |= PF_PACKET_LOOPED;
m_tag_delete(m, ipfwtag);
}
} else if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
action = PF_DROP;
goto done;
}
@ -6828,6 +6959,8 @@ pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb
goto done;
}
pd.p_len = pd.tot_len - off - (pd.hdr.tcp.th_off << 2);
pd.sport = &pd.hdr.tcp.th_sport;
pd.dport = &pd.hdr.tcp.th_dport;
action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
if (action == PF_DROP)
goto done;
@ -6851,6 +6984,8 @@ pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb
log = action != PF_PASS;
goto done;
}
pd.sport = &pd.hdr.udp.uh_sport;
pd.dport = &pd.hdr.udp.uh_dport;
if (pd.hdr.udp.uh_dport == 0 ||
ntohs(pd.hdr.udp.uh_ulen) > m->m_pkthdr.len - off ||
ntohs(pd.hdr.udp.uh_ulen) < sizeof(struct udphdr)) {
@ -6974,6 +7109,47 @@ pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb
}
#endif /* ALTQ */
if (s && (s->dnpipe || s->dnrpipe)) {
pd.act.dnpipe = s->dnpipe;
pd.act.dnrpipe = s->dnrpipe;
pd.act.flags = s->state_flags;
} else {
pd.act.dnpipe = r->dnpipe;
pd.act.dnrpipe = r->dnrpipe;
pd.act.flags = r->free_flags;
}
if ((pd.act.dnpipe || pd.act.dnrpipe) && !PACKET_LOOPED(&pd)) {
if (ip_dn_io_ptr == NULL) {
action = PF_DROP;
REASON_SET(&reason, PFRES_MEMORY);
} else {
struct ip_fw_args dnflow;
if (pd.pf_mtag == NULL &&
((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
action = PF_DROP;
REASON_SET(&reason, PFRES_MEMORY);
if (s)
PF_STATE_UNLOCK(s);
return (action);
}
if (pf_pdesc_to_dnflow(dir, &pd, r, s, &dnflow)) {
ip_dn_io_ptr(m0, &dnflow);
if (*m0 == NULL) {
if (s)
PF_STATE_UNLOCK(s);
return (action);
} else {
/* This is dummynet fast io processing */
m_tag_delete(*m0, m_tag_first(*m0));
pd.pf_mtag->flags &= ~PF_PACKET_LOOPED;
}
}
}
}
if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
(s->nat_rule.ptr->action == PF_RDR ||

View File

@ -527,6 +527,9 @@ pf_nvrule_to_krule(const nvlist_t *nvl, struct pf_krule *rule)
sizeof(rule->pqname)));
PFNV_CHK(pf_nvstring(nvl, "tagname", rule->tagname,
sizeof(rule->tagname)));
PFNV_CHK(pf_nvuint16_opt(nvl, "dnpipe", &rule->dnpipe, 0));
PFNV_CHK(pf_nvuint16_opt(nvl, "dnrpipe", &rule->dnrpipe, 0));
PFNV_CHK(pf_nvuint32_opt(nvl, "dnflags", &rule->free_flags, 0));
PFNV_CHK(pf_nvstring(nvl, "match_tagname", rule->match_tagname,
sizeof(rule->match_tagname)));
PFNV_CHK(pf_nvstring(nvl, "overload_tblname", rule->overload_tblname,
@ -687,6 +690,9 @@ pf_krule_to_nvrule(struct pf_krule *rule)
nvlist_add_string(nvl, "ifname", rule->ifname);
nvlist_add_string(nvl, "qname", rule->qname);
nvlist_add_string(nvl, "pqname", rule->pqname);
nvlist_add_number(nvl, "dnpipe", rule->dnpipe);
nvlist_add_number(nvl, "dnrpipe", rule->dnrpipe);
nvlist_add_number(nvl, "dnflags", rule->free_flags);
nvlist_add_string(nvl, "tagname", rule->tagname);
nvlist_add_string(nvl, "match_tagname", rule->match_tagname);
nvlist_add_string(nvl, "overload_tblname", rule->overload_tblname);