routing: Fix crashes with dpdk_lpm[46] algo.

When a prefix gets deleted from the RIB, dpdk_lpm algo needs to know
 the nexthop of the "parent" prefix to update its internal state.
The glue code, which utilises RIB as a backing route store, uses
 fib[46]_lookup_rt() for the prefix destination after its deletion
 to fetch the desired nexthop.
This approach does not work when deleting less-specific prefixes
 with most-specific ones are still present. For example, if
 10.0.0.0/24, 10.0.0.0/23 and 10.0.0.0/22 exist in RIB, deleting
 10.0.0.0/23 would result in 10.0.0.0/24 being returned as a search
 result instead of 10.0.0.0/22. This, in turn, results in the failed
 datastructure update: part of the deleted /23 prefix will still
 contain the reference to an old nexthop. This leads to the
 use-after-free behaviour, ending with the eventual crashes.

Fix the logic flaw by properly fetching the prefix "parent" via
 newly-created rt_get_inet[6]_parent() helpers.

Differential Revision: https://reviews.freebsd.org/D31546
PR:	256882,256833
MFC after:	1 week
This commit is contained in:
Alexander V. Chernikov 2021-08-15 22:25:21 +00:00
parent 5baf32c97a
commit 36e15b717e
6 changed files with 208 additions and 34 deletions

View File

@ -134,26 +134,27 @@ handle_default_change(struct dpdk_lpm_data *dd, struct rib_cmd_info *rc)
}
static void
get_parent_rule(struct dpdk_lpm_data *dd, struct in_addr addr, uint8_t *plen, uint32_t *nhop_idx)
get_parent_rule(struct dpdk_lpm_data *dd, struct in_addr addr, int plen,
uint8_t *pplen, uint32_t *nhop_idx)
{
struct route_nhop_data rnd;
struct rtentry *rt;
rt = fib4_lookup_rt(dd->fibnum, addr, 0, NHR_UNLOCKED, &rnd);
rt = rt_get_inet_parent(dd->fibnum, addr, plen);
if (rt != NULL) {
struct in_addr addr4;
uint32_t scopeid;
int inet_plen;
rt_get_inet_prefix_plen(rt, &addr4, &inet_plen, &scopeid);
if (inet_plen > 0) {
*plen = inet_plen;
*nhop_idx = fib_get_nhop_idx(dd->fd, rnd.rnd_nhop);
int parent_plen;
rt_get_inet_prefix_plen(rt, &addr4, &parent_plen, &scopeid);
if (parent_plen > 0) {
*pplen = parent_plen;
*nhop_idx = fib_get_nhop_idx(dd->fd, rt_get_raw_nhop(rt));
return;
}
}
*nhop_idx = 0;
*plen = 0;
*pplen = 0;
}
static enum flm_op_result
@ -181,20 +182,23 @@ handle_gu_change(struct dpdk_lpm_data *dd, const struct rib_cmd_info *rc,
}
ret = rte_lpm_add(dd->lpm, ip, plen, nhidx);
FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u = %d",
FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u -> %u ret: %d",
(rc->rc_cmd == RTM_ADD) ? "ADD" : "UPDATE",
abuf, plen, nhidx, ret);
abuf, plen,
rc->rc_nh_old != NULL ? fib_get_nhop_idx(dd->fd, rc->rc_nh_old) : 0,
nhidx, ret);
} else {
/*
* Need to lookup parent. Assume deletion happened already
*/
uint8_t parent_plen;
uint32_t parent_nhop_idx;
get_parent_rule(dd, addr, &parent_plen, &parent_nhop_idx);
get_parent_rule(dd, addr, plen, &parent_plen, &parent_nhop_idx);
ret = rte_lpm_delete(dd->lpm, ip, plen, parent_plen, parent_nhop_idx);
FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK: %s %s/%d nhop %u = %d",
"DEL", abuf, plen, nhidx, ret);
FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK: %s %s/%d -> /%d nhop %u -> %u ret: %d",
"DEL", abuf, plen, parent_plen, fib_get_nhop_idx(dd->fd, rc->rc_nh_old),
parent_nhop_idx, ret);
}
if (ret != 0) {

View File

@ -165,30 +165,26 @@ handle_ll_change(struct dpdk_lpm6_data *dd, struct rib_cmd_info *rc,
}
static struct rte_lpm6_rule *
pack_parent_rule(struct dpdk_lpm6_data *dd, const struct in6_addr *addr6,
char *buffer)
pack_parent_rule(struct dpdk_lpm6_data *dd, const struct in6_addr *addr6, int plen,
int *pplen, uint32_t *pnhop_idx, char *buffer)
{
struct rte_lpm6_rule *lsp_rule = NULL;
struct route_nhop_data rnd;
struct rtentry *rt;
int plen;
rt = fib6_lookup_rt(dd->fibnum, addr6, 0, NHR_UNLOCKED, &rnd);
*pnhop_idx = 0;
*pplen = 0;
rt = rt_get_inet6_parent(dd->fibnum, addr6, plen);
/* plen = 0 means default route and it's out of scope */
if (rt != NULL) {
uint32_t scopeid;
uint32_t nhop_idx, scopeid;
struct in6_addr new_addr6;
rt_get_inet6_prefix_plen(rt, &new_addr6, &plen, &scopeid);
if (plen > 0) {
uint32_t nhidx = fib_get_nhop_idx(dd->fd, rnd.rnd_nhop);
if (nhidx == 0) {
/*
* shouldn't happen as we already have parent route.
* It will trigger rebuild automatically.
*/
return (NULL);
}
lsp_rule = fill_rule6(buffer, (uint8_t *)&new_addr6, plen, nhidx);
nhop_idx = fib_get_nhop_idx(dd->fd, rt_get_raw_nhop(rt));
lsp_rule = fill_rule6(buffer, (uint8_t *)&new_addr6, plen, nhop_idx);
*pnhop_idx = nhop_idx;
*pplen = plen;
}
}
@ -217,20 +213,26 @@ handle_gu_change(struct dpdk_lpm6_data *dd, const struct rib_cmd_info *rc,
ret = rte_lpm6_add(dd->lpm6, (const uint8_t *)addr6,
plen, nhidx, (rc->rc_cmd == RTM_ADD) ? 1 : 0);
FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u = %d",
FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u -> %u ret: %d",
(rc->rc_cmd == RTM_ADD) ? "ADD" : "UPDATE",
abuf, plen, nhidx, ret);
abuf, plen,
rc->rc_nh_old != NULL ? fib_get_nhop_idx(dd->fd, rc->rc_nh_old) : 0,
nhidx, ret);
} else {
/*
* Need to lookup parent. Assume deletion happened already
*/
char buffer[RTE_LPM6_RULE_SIZE];
struct rte_lpm6_rule *lsp_rule = NULL;
lsp_rule = pack_parent_rule(dd, addr6, buffer);
int parent_plen;
uint32_t parent_nhop_idx;
lsp_rule = pack_parent_rule(dd, addr6, plen, &parent_plen,
&parent_nhop_idx, buffer);
ret = rte_lpm6_delete(dd->lpm6, (const uint8_t *)addr6, plen, lsp_rule);
FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop ? = %d",
"DEL", abuf, plen, ret);
FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d -> /%d nhop %u -> %u ret: %d",
"DEL", abuf, plen, parent_plen, fib_get_nhop_idx(dd->fd, rc->rc_nh_old),
parent_nhop_idx, ret);
}
if (ret != 0) {

View File

@ -371,6 +371,20 @@ rn_match(void *v_arg, struct radix_head *head)
return (0);
}
/*
* Returns the next (wider) prefix for the key defined by @rn
* if exists.
*/
struct radix_node *
rn_nextprefix(struct radix_node *rn)
{
for (rn = rn->rn_dupedkey; rn != NULL; rn = rn->rn_dupedkey) {
if (!(rn->rn_flags & RNF_ROOT))
return (rn);
}
return (NULL);
}
#ifdef RN_DEBUG
int rn_nodenum;
struct radix_node *rn_clist;

View File

@ -119,6 +119,7 @@ typedef int rn_walktree_t(struct radix_head *head, walktree_f_t *f,
typedef int rn_walktree_from_t(struct radix_head *head,
void *a, void *m, walktree_f_t *f, void *w);
typedef void rn_close_t(struct radix_node *rn, struct radix_head *head);
struct radix_node *rn_nextprefix(struct radix_node *rn);
struct radix_mask_head;

View File

@ -117,6 +117,7 @@ void rt_get_inet_prefix_plen(const struct rtentry *rt, struct in_addr *paddr,
int *plen, uint32_t *pscopeid);
void rt_get_inet_prefix_pmask(const struct rtentry *rt, struct in_addr *paddr,
struct in_addr *pmask, uint32_t *pscopeid);
struct rtentry *rt_get_inet_parent(uint32_t fibnum, struct in_addr addr, int plen);
#endif
#ifdef INET6
struct in6_addr;
@ -124,6 +125,8 @@ void rt_get_inet6_prefix_plen(const struct rtentry *rt, struct in6_addr *paddr,
int *plen, uint32_t *pscopeid);
void rt_get_inet6_prefix_pmask(const struct rtentry *rt, struct in6_addr *paddr,
struct in6_addr *pmask, uint32_t *pscopeid);
struct rtentry *rt_get_inet6_parent(uint32_t fibnum, const struct in6_addr *paddr,
int plen);
#endif
/* Nexthops */

View File

@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$");
#endif
#ifdef INET6
#include <netinet6/in6_fib.h>
#include <netinet6/in6_var.h>
#endif
#include <net/vnet.h>
@ -415,3 +416,152 @@ rib_decompose_notification(struct rib_cmd_info *rc, route_notification_t *cb,
}
}
#endif
#ifdef INET
/*
* Checks if the found key in the trie contains (<=) a prefix covering
* @paddr/@plen.
* Returns the most specific rtentry matching the condition or NULL.
*/
static struct rtentry *
get_inet_parent_prefix(uint32_t fibnum, struct in_addr addr, int plen)
{
struct route_nhop_data rnd;
struct rtentry *rt;
struct in_addr addr4;
uint32_t scopeid;
int parent_plen;
struct radix_node *rn;
rt = fib4_lookup_rt(fibnum, addr, 0, NHR_UNLOCKED, &rnd);
rt_get_inet_prefix_plen(rt, &addr4, &parent_plen, &scopeid);
if (parent_plen <= plen)
return (rt);
/*
* There can be multiple prefixes associated with the found key:
* 10.0.0.0 -> 10.0.0.0/24, 10.0.0.0/23, 10.0.0.0/22, etc.
* All such prefixes are linked via rn_dupedkey, from most specific
* to least specific. Iterate over them to check if any of these
* prefixes are wider than desired plen.
*/
rn = (struct radix_node *)rt;
while ((rn = rn_nextprefix(rn)) != NULL) {
rt = RNTORT(rn);
rt_get_inet_prefix_plen(rt, &addr4, &parent_plen, &scopeid);
if (parent_plen <= plen)
return (rt);
}
return (NULL);
}
/*
* Returns the most specific prefix containing (>) @paddr/plen.
*/
struct rtentry *
rt_get_inet_parent(uint32_t fibnum, struct in_addr addr, int plen)
{
struct in_addr lookup_addr = { .s_addr = INADDR_BROADCAST };
struct in_addr addr4 = addr;
struct in_addr mask4;
struct rtentry *rt;
while (plen-- > 0) {
/* Calculate wider mask & new key to lookup */
mask4.s_addr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0);
addr4.s_addr = htonl(ntohl(addr4.s_addr) & ntohl(mask4.s_addr));
if (addr4.s_addr == lookup_addr.s_addr) {
/* Skip lookup if the key is the same */
continue;
}
lookup_addr = addr4;
rt = get_inet_parent_prefix(fibnum, lookup_addr, plen);
if (rt != NULL)
return (rt);
}
return (NULL);
}
#endif
#ifdef INET6
/*
* Checks if the found key in the trie contains (<=) a prefix covering
* @paddr/@plen.
* Returns the most specific rtentry matching the condition or NULL.
*/
static struct rtentry *
get_inet6_parent_prefix(uint32_t fibnum, const struct in6_addr *paddr, int plen)
{
struct route_nhop_data rnd;
struct rtentry *rt;
struct in6_addr addr6;
uint32_t scopeid;
int parent_plen;
struct radix_node *rn;
rt = fib6_lookup_rt(fibnum, paddr, 0, NHR_UNLOCKED, &rnd);
rt_get_inet6_prefix_plen(rt, &addr6, &parent_plen, &scopeid);
if (parent_plen <= plen)
return (rt);
/*
* There can be multiple prefixes associated with the found key:
* 2001:db8:1::/64 -> 2001:db8:1::/56, 2001:db8:1::/48, etc.
* All such prefixes are linked via rn_dupedkey, from most specific
* to least specific. Iterate over them to check if any of these
* prefixes are wider than desired plen.
*/
rn = (struct radix_node *)rt;
while ((rn = rn_nextprefix(rn)) != NULL) {
rt = RNTORT(rn);
rt_get_inet6_prefix_plen(rt, &addr6, &parent_plen, &scopeid);
if (parent_plen <= plen)
return (rt);
}
return (NULL);
}
static void
ipv6_writemask(struct in6_addr *addr6, uint8_t mask)
{
uint32_t *cp;
for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32)
*cp++ = 0xFFFFFFFF;
if (mask > 0)
*cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0);
}
/*
* Returns the most specific prefix containing (>) @paddr/plen.
*/
struct rtentry *
rt_get_inet6_parent(uint32_t fibnum, const struct in6_addr *paddr, int plen)
{
struct in6_addr lookup_addr = in6mask128;
struct in6_addr addr6 = *paddr;
struct in6_addr mask6;
struct rtentry *rt;
while (plen-- > 0) {
/* Calculate wider mask & new key to lookup */
ipv6_writemask(&mask6, plen);
IN6_MASK_ADDR(&addr6, &mask6);
if (IN6_ARE_ADDR_EQUAL(&addr6, &lookup_addr)) {
/* Skip lookup if the key is the same */
continue;
}
lookup_addr = addr6;
rt = get_inet6_parent_prefix(fibnum, &lookup_addr, plen);
if (rt != NULL)
return (rt);
}
return (NULL);
}
#endif