FreeBSD previously provided route caching for TCP (and UDP). Re-add
route caching for TCP, with some improvements. In particular, invalidate the route cache if a new route is added, which might be a better match. The cache is automatically invalidated if the old route is deleted. Submitted by: Mike Karels Reviewed by: gnn Differential Revision: https://reviews.freebsd.org/D4306
This commit is contained in:
parent
1c9b29f95f
commit
84cc0778d0
@ -201,6 +201,16 @@ rt_tables_get_rnh(int table, int fam)
|
||||
return (*rt_tables_get_rnh_ptr(table, fam));
|
||||
}
|
||||
|
||||
rt_gen_t
|
||||
rt_tables_get_gen(int table, int fam)
|
||||
{
|
||||
struct rib_head *rnh;
|
||||
|
||||
rnh = *rt_tables_get_rnh_ptr(table, fam);
|
||||
return (rnh->rnh_gen);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* route initialization must occur before ip6_init2(), which happenas at
|
||||
* SI_ORDER_MIDDLE.
|
||||
@ -1754,6 +1764,7 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
|
||||
*ret_nrt = rt;
|
||||
RT_ADDREF(rt);
|
||||
}
|
||||
rnh->rnh_gen++; /* Routing table updated */
|
||||
RT_UNLOCK(rt);
|
||||
break;
|
||||
case RTM_CHANGE:
|
||||
|
@ -98,6 +98,14 @@ struct rt_metrics {
|
||||
/* lle state is exported in rmx_state rt_metrics field */
|
||||
#define rmx_state rmx_weight
|
||||
|
||||
/*
|
||||
* Keep a generation count of routing table, incremented on route addition,
|
||||
* so we can invalidate caches. This is accessed without a lock, as precision
|
||||
* is not required.
|
||||
*/
|
||||
typedef volatile u_int rt_gen_t; /* tree generation (for adds) */
|
||||
#define RT_GEN(fibnum, af) rt_tables_get_gen(fibnum, af)
|
||||
|
||||
#define RT_DEFAULT_FIB 0 /* Explicitly mark fib=0 restricted cases */
|
||||
#define RT_ALL_FIBS -1 /* Announce event for every fib */
|
||||
#ifdef _KERNEL
|
||||
@ -398,6 +406,20 @@ struct rt_addrinfo {
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Validate a cached route based on a supplied cookie. If there is an
|
||||
* out-of-date cache, simply free it. Update the generation number
|
||||
* for the new allocation
|
||||
*/
|
||||
#define RT_VALIDATE(ro, cookiep, fibnum) do { \
|
||||
rt_gen_t cookie = RT_GEN(fibnum, (ro)->ro_dst.sa_family); \
|
||||
if (*(cookiep) != cookie && (ro)->ro_rt != NULL) { \
|
||||
RTFREE((ro)->ro_rt); \
|
||||
(ro)->ro_rt = NULL; \
|
||||
*(cookiep) = cookie; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
struct ifmultiaddr;
|
||||
struct rib_head;
|
||||
|
||||
@ -415,6 +437,7 @@ int rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *);
|
||||
void rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *);
|
||||
struct rib_head *rt_table_init(int);
|
||||
void rt_table_destroy(struct rib_head *);
|
||||
rt_gen_t rt_tables_get_gen(int table, int fam);
|
||||
|
||||
int rtsock_addrmsg(int, struct ifaddr *, int);
|
||||
int rtsock_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int);
|
||||
|
@ -41,7 +41,7 @@ struct rib_head {
|
||||
rn_walktree_t *rnh_walktree; /* traverse tree */
|
||||
rn_walktree_from_t *rnh_walktree_from; /* traverse tree below a */
|
||||
rn_close_t *rnh_close; /*do something when the last ref drops*/
|
||||
u_int rnh_gen; /* generation counter */
|
||||
rt_gen_t rnh_gen; /* generation counter */
|
||||
int rnh_multipath; /* multipath capable ? */
|
||||
struct radix_node rnh_nodes[3]; /* empty tree for common case */
|
||||
struct rwlock rib_lock; /* config/data path lock */
|
||||
|
@ -1298,6 +1298,11 @@ in_pcbfree(struct inpcb *inp)
|
||||
if (inp->inp_moptions != NULL)
|
||||
inp_freemoptions(inp->inp_moptions);
|
||||
#endif
|
||||
if (inp->inp_route.ro_rt) {
|
||||
RTFREE(inp->inp_route.ro_rt);
|
||||
inp->inp_route.ro_rt = (struct rtentry *)NULL;
|
||||
}
|
||||
|
||||
inp->inp_vflag = 0;
|
||||
inp->inp_flags2 |= INP_FREED;
|
||||
crfree(inp->inp_cred);
|
||||
@ -2224,6 +2229,23 @@ in_pcbremlists(struct inpcb *inp)
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for alternatives when higher level complains
|
||||
* about service problems. For now, invalidate cached
|
||||
* routing information. If the route was created dynamically
|
||||
* (by a redirect), time to try a default gateway again.
|
||||
*/
|
||||
void
|
||||
in_losing(struct inpcb *inp)
|
||||
{
|
||||
|
||||
if (inp->inp_route.ro_rt) {
|
||||
RTFREE(inp->inp_route.ro_rt);
|
||||
inp->inp_route.ro_rt = (struct rtentry *)NULL;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* A set label operation has occurred at the socket layer, propagate the
|
||||
* label change into the in_pcb for the socket.
|
||||
|
@ -42,6 +42,7 @@
|
||||
#include <sys/_lock.h>
|
||||
#include <sys/_mutex.h>
|
||||
#include <sys/_rwlock.h>
|
||||
#include <net/route.h>
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/lock.h>
|
||||
@ -238,8 +239,14 @@ struct inpcb {
|
||||
#define inp_zero_size offsetof(struct inpcb, inp_gencnt)
|
||||
inp_gen_t inp_gencnt; /* (c) generation count */
|
||||
struct llentry *inp_lle; /* cached L2 information */
|
||||
struct rtentry *inp_rt; /* cached L3 information */
|
||||
struct rwlock inp_lock;
|
||||
rt_gen_t inp_rt_cookie; /* generation for route entry */
|
||||
union { /* cached L3 information */
|
||||
struct route inpu_route;
|
||||
struct route_in6 inpu_route6;
|
||||
} inp_rtu;
|
||||
#define inp_route inp_rtu.inpu_route
|
||||
#define inp_route6 inp_rtu.inpu_route6
|
||||
};
|
||||
#define inp_fport inp_inc.inc_fport
|
||||
#define inp_lport inp_inc.inc_lport
|
||||
@ -722,6 +729,7 @@ void in_pcbrehash_mbuf(struct inpcb *, struct mbuf *);
|
||||
int in_pcbrele(struct inpcb *);
|
||||
int in_pcbrele_rlocked(struct inpcb *);
|
||||
int in_pcbrele_wlocked(struct inpcb *);
|
||||
void in_losing(struct inpcb *);
|
||||
void in_pcbsetsolabel(struct socket *so);
|
||||
int in_getpeeraddr(struct socket *so, struct sockaddr **nam);
|
||||
int in_getsockaddr(struct socket *so, struct sockaddr **nam);
|
||||
|
@ -282,17 +282,36 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
|
||||
gw = dst = (struct sockaddr_in *)&ro->ro_dst;
|
||||
fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
|
||||
rte = ro->ro_rt;
|
||||
/*
|
||||
* The address family should also be checked in case of sharing
|
||||
* the cache with IPv6.
|
||||
*/
|
||||
if (rte == NULL || dst->sin_family != AF_INET) {
|
||||
if (rte == NULL) {
|
||||
bzero(dst, sizeof(*dst));
|
||||
dst->sin_family = AF_INET;
|
||||
dst->sin_len = sizeof(*dst);
|
||||
dst->sin_addr = ip->ip_dst;
|
||||
}
|
||||
again:
|
||||
/*
|
||||
* Validate route against routing table additions;
|
||||
* a better/more specific route might have been added.
|
||||
*/
|
||||
if (inp)
|
||||
RT_VALIDATE(ro, &inp->inp_rt_cookie, fibnum);
|
||||
/*
|
||||
* If there is a cached route,
|
||||
* check that it is to the same destination
|
||||
* and is still up. If not, free it and try again.
|
||||
* The address family should also be checked in case of sharing the
|
||||
* cache with IPv6.
|
||||
* Also check whether routing cache needs invalidation.
|
||||
*/
|
||||
rte = ro->ro_rt;
|
||||
if (rte && ((rte->rt_flags & RTF_UP) == 0 ||
|
||||
rte->rt_ifp == NULL ||
|
||||
!RT_LINK_IS_UP(rte->rt_ifp) ||
|
||||
dst->sin_family != AF_INET ||
|
||||
dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
|
||||
RTFREE(rte);
|
||||
rte = ro->ro_rt = (struct rtentry *)NULL;
|
||||
}
|
||||
ia = NULL;
|
||||
have_ia_ref = 0;
|
||||
/*
|
||||
|
@ -1379,9 +1379,6 @@ tcp_output(struct tcpcb *tp)
|
||||
#endif
|
||||
#ifdef INET
|
||||
{
|
||||
struct route ro;
|
||||
|
||||
bzero(&ro, sizeof(ro));
|
||||
ip->ip_len = htons(m->m_pkthdr.len);
|
||||
#ifdef INET6
|
||||
if (tp->t_inpcb->inp_vflag & INP_IPV6PROTO)
|
||||
@ -1412,13 +1409,12 @@ tcp_output(struct tcpcb *tp)
|
||||
tcp_pcap_add(th, m, &(tp->t_outpkts));
|
||||
#endif
|
||||
|
||||
error = ip_output(m, tp->t_inpcb->inp_options, &ro,
|
||||
error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
|
||||
((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0,
|
||||
tp->t_inpcb);
|
||||
|
||||
if (error == EMSGSIZE && ro.ro_rt != NULL)
|
||||
mtu = ro.ro_rt->rt_mtu;
|
||||
RO_RTFREE(&ro);
|
||||
if (error == EMSGSIZE && tp->t_inpcb->inp_route.ro_rt != NULL)
|
||||
mtu = tp->t_inpcb->inp_route.ro_rt->rt_mtu;
|
||||
}
|
||||
#endif /* INET */
|
||||
|
||||
|
@ -1632,6 +1632,10 @@ tcp_notify(struct inpcb *inp, int error)
|
||||
if (tp->t_state == TCPS_ESTABLISHED &&
|
||||
(error == EHOSTUNREACH || error == ENETUNREACH ||
|
||||
error == EHOSTDOWN)) {
|
||||
if (inp->inp_route.ro_rt) {
|
||||
RTFREE(inp->inp_route.ro_rt);
|
||||
inp->inp_route.ro_rt = (struct rtentry *)NULL;
|
||||
}
|
||||
return (inp);
|
||||
} else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
|
||||
tp->t_softerror) {
|
||||
@ -1926,11 +1930,11 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
|
||||
else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB ||
|
||||
cmd == PRC_UNREACH_PORT || cmd == PRC_TIMXCEED_INTRANS) && ip)
|
||||
notify = tcp_drop_syn_sent;
|
||||
/*
|
||||
* Redirects don't need to be handled up here.
|
||||
*/
|
||||
else if (PRC_IS_REDIRECT(cmd))
|
||||
else if (PRC_IS_REDIRECT(cmd)) {
|
||||
/* signal EHOSTDOWN, as it flushes the cached route */
|
||||
in_pcbnotifyall(&tcbinfo, faddr, EHOSTDOWN, notify);
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* Hostdead is ugly because it goes linearly through all PCBs.
|
||||
* XXX: We never get this from ICMP, otherwise it makes an
|
||||
|
@ -786,7 +786,9 @@ tcp_timer_rexmt(void * xtp)
|
||||
#ifdef INET6
|
||||
if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
|
||||
in6_losing(tp->t_inpcb);
|
||||
else
|
||||
#endif
|
||||
in_losing(tp->t_inpcb);
|
||||
tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
|
||||
tp->t_srtt = 0;
|
||||
}
|
||||
|
@ -740,6 +740,11 @@ udp_notify(struct inpcb *inp, int errno)
|
||||
* or a write lock, but a read lock is sufficient.
|
||||
*/
|
||||
INP_LOCK_ASSERT(inp);
|
||||
if ((errno == EHOSTUNREACH || errno == ENETUNREACH ||
|
||||
errno == EHOSTDOWN) && inp->inp_route.ro_rt) {
|
||||
RTFREE(inp->inp_route.ro_rt);
|
||||
inp->inp_route.ro_rt = (struct rtentry *)NULL;
|
||||
}
|
||||
|
||||
inp->inp_socket->so_error = errno;
|
||||
sorwakeup(inp->inp_socket);
|
||||
@ -761,11 +766,11 @@ udp_common_ctlinput(int cmd, struct sockaddr *sa, void *vip,
|
||||
if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Redirects don't need to be handled up here.
|
||||
*/
|
||||
if (PRC_IS_REDIRECT(cmd))
|
||||
if (PRC_IS_REDIRECT(cmd)) {
|
||||
/* signal EHOSTDOWN, as it flushes the cached route */
|
||||
in_pcbnotifyall(&udbinfo, faddr, EHOSTDOWN, udp_notify);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Hostdead is ugly because it goes linearly through all PCBs.
|
||||
@ -1116,7 +1121,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
|
||||
int error = 0;
|
||||
int ipflags;
|
||||
u_short fport, lport;
|
||||
int unlock_udbinfo;
|
||||
int unlock_udbinfo, unlock_inp;
|
||||
u_char tos;
|
||||
uint8_t pr;
|
||||
uint16_t cscov = 0;
|
||||
@ -1137,7 +1142,15 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
|
||||
}
|
||||
|
||||
src.sin_family = 0;
|
||||
sin = (struct sockaddr_in *)addr;
|
||||
if (sin == NULL ||
|
||||
(inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
|
||||
INP_WLOCK(inp);
|
||||
unlock_inp = UH_WLOCKED;
|
||||
} else {
|
||||
INP_RLOCK(inp);
|
||||
unlock_inp = UH_RLOCKED;
|
||||
}
|
||||
tos = inp->inp_ip_tos;
|
||||
if (control != NULL) {
|
||||
/*
|
||||
@ -1145,6 +1158,9 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
|
||||
* stored in a single mbuf.
|
||||
*/
|
||||
if (control->m_next) {
|
||||
if (unlock_inp == UH_WLOCKED)
|
||||
INP_WUNLOCK(inp);
|
||||
else
|
||||
INP_RUNLOCK(inp);
|
||||
m_freem(control);
|
||||
m_freem(m);
|
||||
@ -1220,6 +1236,9 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
|
||||
m_freem(control);
|
||||
}
|
||||
if (error) {
|
||||
if (unlock_inp == UH_WLOCKED)
|
||||
INP_WUNLOCK(inp);
|
||||
else
|
||||
INP_RUNLOCK(inp);
|
||||
m_freem(m);
|
||||
return (error);
|
||||
@ -1246,8 +1265,6 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
|
||||
sin = (struct sockaddr_in *)addr;
|
||||
if (sin != NULL &&
|
||||
(inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
|
||||
INP_RUNLOCK(inp);
|
||||
INP_WLOCK(inp);
|
||||
INP_HASH_WLOCK(pcbinfo);
|
||||
unlock_udbinfo = UH_WLOCKED;
|
||||
} else if ((sin != NULL && (
|
||||
@ -1514,9 +1531,10 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
|
||||
else if (unlock_udbinfo == UH_RLOCKED)
|
||||
INP_HASH_RUNLOCK(pcbinfo);
|
||||
UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
|
||||
error = ip_output(m, inp->inp_options, NULL, ipflags,
|
||||
error = ip_output(m, inp->inp_options,
|
||||
(unlock_inp == UH_WLOCKED ? &inp->inp_route : NULL), ipflags,
|
||||
inp->inp_moptions, inp);
|
||||
if (unlock_udbinfo == UH_WLOCKED)
|
||||
if (unlock_inp == UH_WLOCKED)
|
||||
INP_WUNLOCK(inp);
|
||||
else
|
||||
INP_RUNLOCK(inp);
|
||||
|
@ -827,9 +827,10 @@ void
|
||||
in6_losing(struct inpcb *in6p)
|
||||
{
|
||||
|
||||
/*
|
||||
* We don't store route pointers in the routing table anymore
|
||||
*/
|
||||
if (in6p->inp_route6.ro_rt) {
|
||||
RTFREE(in6p->inp_route6.ro_rt);
|
||||
in6p->inp_route6.ro_rt = (struct rtentry *)NULL;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@ -840,9 +841,11 @@ in6_losing(struct inpcb *in6p)
|
||||
struct inpcb *
|
||||
in6_rtchange(struct inpcb *inp, int errno)
|
||||
{
|
||||
/*
|
||||
* We don't store route pointers in the routing table anymore
|
||||
*/
|
||||
|
||||
if (inp->inp_route6.ro_rt) {
|
||||
RTFREE(inp->inp_route6.ro_rt);
|
||||
inp->inp_route6.ro_rt = (struct rtentry *)NULL;
|
||||
}
|
||||
return inp;
|
||||
}
|
||||
|
||||
|
@ -546,7 +546,18 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
|
||||
/* adjust pointer */
|
||||
ip6 = mtod(m, struct ip6_hdr *);
|
||||
|
||||
if (ro->ro_rt && fwd_tag == NULL) {
|
||||
/*
|
||||
* Validate route against routing table additions;
|
||||
* a better/more specific route might have been added.
|
||||
* Make sure address family is set in route.
|
||||
*/
|
||||
if (inp) {
|
||||
ro->ro_dst.sin6_family = AF_INET6;
|
||||
RT_VALIDATE((struct route *)ro, &inp->inp_rt_cookie, fibnum);
|
||||
}
|
||||
if (ro->ro_rt && fwd_tag == NULL && (ro->ro_rt->rt_flags & RTF_UP) &&
|
||||
ro->ro_dst.sin6_family == AF_INET6 &&
|
||||
IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst)) {
|
||||
rt = ro->ro_rt;
|
||||
ifp = ro->ro_rt->rt_ifp;
|
||||
} else {
|
||||
@ -939,7 +950,8 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
|
||||
m->m_pkthdr.len);
|
||||
ifa_free(&ia6->ia_ifa);
|
||||
}
|
||||
error = nd6_output_ifp(ifp, origifp, m, dst, NULL);
|
||||
error = nd6_output_ifp(ifp, origifp, m, dst,
|
||||
(struct route *)ro);
|
||||
goto done;
|
||||
}
|
||||
|
||||
@ -1038,7 +1050,8 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
|
||||
counter_u64_add(ia->ia_ifa.ifa_obytes,
|
||||
m->m_pkthdr.len);
|
||||
}
|
||||
error = nd6_output_ifp(ifp, origifp, m, dst, NULL);
|
||||
error = nd6_output_ifp(ifp, origifp, m, dst,
|
||||
(struct route *)ro);
|
||||
} else
|
||||
m_freem(m);
|
||||
}
|
||||
|
@ -876,8 +876,8 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
|
||||
|
||||
UDP_PROBE(send, NULL, inp, ip6, inp, udp6);
|
||||
UDPSTAT_INC(udps_opackets);
|
||||
error = ip6_output(m, optp, NULL, flags, inp->in6p_moptions,
|
||||
NULL, inp);
|
||||
error = ip6_output(m, optp, &inp->inp_route6, flags,
|
||||
inp->in6p_moptions, NULL, inp);
|
||||
break;
|
||||
case AF_INET:
|
||||
error = EAFNOSUPPORT;
|
||||
|
Loading…
Reference in New Issue
Block a user