When ip_output()/ip6_output() is supplied a struct route *ro argument,

it skips FLOWTABLE lookup. However, the non-NULL ro has dual meaning
here: it may be supplied to provide route, and it may be supplied to
store and return to caller the route that ip_output()/ip6_output()
finds. In the latter case skipping FLOWTABLE lookup is pessimisation.

The difference between struct route filled by FLOWTABLE and filled
by rtalloc() family is that the former doesn't hold a reference on
its rtentry. Reference is hold by flow entry, and it is about to
be released in future. Thus, route filled by FLOWTABLE shouldn't
be passed to RTFREE() macro.

- Introduce new flag for struct route/route_in6, that marks route
  not holding a reference on rtentry.
- Introduce new macro RO_RTFREE() that cleans up a struct route
  depending on its kind.
- All callers to ip_output()/ip6_output() that do supply non-NULL
  but empty route should use RO_RTFREE() to free results of
  lookup.
- ip_output()/ip6_output() now do FLOWTABLE lookup always when
  ro->ro_rt == NULL.

Tested by:	tuexen (SCTP part)
This commit is contained in:
Gleb Smirnoff 2012-07-04 07:37:53 +00:00
parent e3d6ef0b03
commit bf9840512a
7 changed files with 58 additions and 53 deletions

View File

@ -619,6 +619,7 @@ flow_to_route(struct flentry *fle, struct route *ro)
sin->sin_addr.s_addr = hashkey[2]; sin->sin_addr.s_addr = hashkey[2];
ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt); ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle); ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle);
ro->ro_flags |= RT_NORTREF;
} }
#endif /* INET */ #endif /* INET */
@ -826,7 +827,7 @@ flow_to_route_in6(struct flentry *fle, struct route_in6 *ro)
memcpy(&sin6->sin6_addr, &hashkey[5], sizeof (struct in6_addr)); memcpy(&sin6->sin6_addr, &hashkey[5], sizeof (struct in6_addr));
ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt); ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle); ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle);
ro->ro_flags |= RT_NORTREF;
} }
#endif /* INET6 */ #endif /* INET6 */

View File

@ -54,7 +54,8 @@ struct route {
struct sockaddr ro_dst; struct sockaddr ro_dst;
}; };
#define RT_CACHING_CONTEXT 0x1 #define RT_CACHING_CONTEXT 0x1 /* XXX: not used anywhere */
#define RT_NORTREF 0x2 /* doesn't hold reference on ro_rt */
/* /*
* These numbers are used by reliable protocols for determining * These numbers are used by reliable protocols for determining
@ -341,6 +342,18 @@ struct rt_addrinfo {
RTFREE_LOCKED(_rt); \ RTFREE_LOCKED(_rt); \
} while (0) } while (0)
#define RO_RTFREE(_ro) do { \
if ((_ro)->ro_rt) { \
if ((_ro)->ro_flags & RT_NORTREF) { \
(_ro)->ro_flags &= ~RT_NORTREF; \
(_ro)->ro_rt = NULL; \
} else { \
RT_LOCK((_ro)->ro_rt); \
RTFREE_LOCKED((_ro)->ro_rt); \
} \
} \
} while (0)
struct radix_node_head *rt_tables_get_rnh(int, int); struct radix_node_head *rt_tables_get_rnh(int, int);
struct ifmultiaddr; struct ifmultiaddr;

View File

@ -1495,8 +1495,7 @@ ip_forward(struct mbuf *m, int srcrt)
if (error == EMSGSIZE && ro.ro_rt) if (error == EMSGSIZE && ro.ro_rt)
mtu = ro.ro_rt->rt_rmx.rmx_mtu; mtu = ro.ro_rt->rt_rmx.rmx_mtu;
if (ro.ro_rt) RO_RTFREE(&ro);
RTFREE(ro.ro_rt);
if (error) if (error)
IPSTAT_INC(ips_cantforward); IPSTAT_INC(ips_cantforward);

View File

@ -105,6 +105,10 @@ extern struct protosw inetsw[];
* ip_len and ip_off are in host format. * ip_len and ip_off are in host format.
* The mbuf chain containing the packet will be freed. * The mbuf chain containing the packet will be freed.
* The mbuf opt, if present, will not be freed. * The mbuf opt, if present, will not be freed.
* If route ro is present and has ro_rt initialized, route lookup would be
* skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
* then result of route lookup is stored in ro->ro_rt.
*
* In the IP forwarding case, the packet will arrive with options already * In the IP forwarding case, the packet will arrive with options already
* inserted, so must have a NULL opt pointer. * inserted, so must have a NULL opt pointer.
*/ */
@ -119,7 +123,6 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
int mtu; int mtu;
int n; /* scratchpad */ int n; /* scratchpad */
int error = 0; int error = 0;
int nortfree = 0;
struct sockaddr_in *dst; struct sockaddr_in *dst;
struct in_ifaddr *ia = NULL; struct in_ifaddr *ia = NULL;
int isbroadcast, sw_csum; int isbroadcast, sw_csum;
@ -146,24 +149,23 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
if (ro == NULL) { if (ro == NULL) {
ro = &iproute; ro = &iproute;
bzero(ro, sizeof (*ro)); bzero(ro, sizeof (*ro));
}
#ifdef FLOWTABLE #ifdef FLOWTABLE
{ if (ro->ro_rt == NULL) {
struct flentry *fle; struct flentry *fle;
/* /*
* The flow table returns route entries valid for up to 30 * The flow table returns route entries valid for up to 30
* seconds; we rely on the remainder of ip_output() taking no * seconds; we rely on the remainder of ip_output() taking no
* longer than that long for the stability of ro_rt. The * longer than that long for the stability of ro_rt. The
* flow ID assignment must have happened before this point. * flow ID assignment must have happened before this point.
*/ */
if ((fle = flowtable_lookup_mbuf(V_ip_ft, m, AF_INET)) != NULL) { fle = flowtable_lookup_mbuf(V_ip_ft, m, AF_INET);
flow_to_route(fle, ro); if (fle != NULL)
nortfree = 1; flow_to_route(fle, ro);
}
}
#endif
} }
#endif
if (opt) { if (opt) {
int len = 0; int len = 0;
@ -209,10 +211,9 @@ again:
!RT_LINK_IS_UP(rte->rt_ifp) || !RT_LINK_IS_UP(rte->rt_ifp) ||
dst->sin_family != AF_INET || dst->sin_family != AF_INET ||
dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
if (!nortfree) RO_RTFREE(ro);
RTFREE(rte); ro->ro_lle = NULL;
rte = ro->ro_rt = (struct rtentry *)NULL; rte = NULL;
ro->ro_lle = (struct llentry *)NULL;
} }
#ifdef IPFIREWALL_FORWARD #ifdef IPFIREWALL_FORWARD
if (rte == NULL && fwd_tag == NULL) { if (rte == NULL && fwd_tag == NULL) {
@ -672,9 +673,8 @@ passout:
IPSTAT_INC(ips_fragmented); IPSTAT_INC(ips_fragmented);
done: done:
if (ro == &iproute && ro->ro_rt && !nortfree) { if (ro == &iproute)
RTFREE(ro->ro_rt); RO_RTFREE(ro);
}
if (ia != NULL) if (ia != NULL)
ifa_free(&ia->ia_ifa); ifa_free(&ia->ia_ifa);
return (error); return (error);

View File

@ -4156,10 +4156,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
SCTPDBG(SCTP_DEBUG_OUTPUT3, "IP output returns %d\n", ret); SCTPDBG(SCTP_DEBUG_OUTPUT3, "IP output returns %d\n", ret);
if (net == NULL) { if (net == NULL) {
/* free tempy routes */ /* free tempy routes */
if (ro->ro_rt) { RO_RTFREE(ro);
RTFREE(ro->ro_rt);
ro->ro_rt = NULL;
}
} else { } else {
/* /*
* PMTU check versus smallest asoc MTU goes * PMTU check versus smallest asoc MTU goes
@ -4513,9 +4510,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
} }
if (net == NULL) { if (net == NULL) {
/* Now if we had a temp route free it */ /* Now if we had a temp route free it */
if (ro->ro_rt) { RO_RTFREE(ro);
RTFREE(ro->ro_rt);
}
} else { } else {
/* /*
* PMTU check versus smallest asoc MTU goes * PMTU check versus smallest asoc MTU goes

View File

@ -213,6 +213,9 @@ in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
* This function may modify ver and hlim only. * This function may modify ver and hlim only.
* The mbuf chain containing the packet will be freed. * The mbuf chain containing the packet will be freed.
* The mbuf opt, if present, will not be freed. * The mbuf opt, if present, will not be freed.
* If route_in6 ro is present and has ro_rt initialized, route lookup would be
* skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
* then result of route lookup is stored in ro->ro_rt.
* *
* type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
* nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one, * nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one,
@ -243,7 +246,6 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
struct in6_addr finaldst, src0, dst0; struct in6_addr finaldst, src0, dst0;
u_int32_t zone; u_int32_t zone;
struct route_in6 *ro_pmtu = NULL; struct route_in6 *ro_pmtu = NULL;
int flevalid = 0;
int hdrsplit = 0; int hdrsplit = 0;
int needipsec = 0; int needipsec = 0;
int sw_csum, tso; int sw_csum, tso;
@ -506,7 +508,7 @@ skip_ipsec2:;
ro = &opt->ip6po_route; ro = &opt->ip6po_route;
dst = (struct sockaddr_in6 *)&ro->ro_dst; dst = (struct sockaddr_in6 *)&ro->ro_dst;
#ifdef FLOWTABLE #ifdef FLOWTABLE
if (ro == &ip6route) { if (ro->ro_rt == NULL) {
struct flentry *fle; struct flentry *fle;
/* /*
@ -515,11 +517,9 @@ skip_ipsec2:;
* longer than that long for the stability of ro_rt. The * longer than that long for the stability of ro_rt. The
* flow ID assignment must have happened before this point. * flow ID assignment must have happened before this point.
*/ */
if ((fle = flowtable_lookup_mbuf(V_ip6_ft, m, AF_INET6)) != NULL) { fle = flowtable_lookup_mbuf(V_ip6_ft, m, AF_INET6);
if (fle != NULL)
flow_to_route_in6(fle, ro); flow_to_route_in6(fle, ro);
if (ro->ro_rt != NULL && ro->ro_lle != NULL)
flevalid = 1;
}
} }
#endif #endif
again: again:
@ -627,7 +627,7 @@ again:
dst_sa.sin6_family = AF_INET6; dst_sa.sin6_family = AF_INET6;
dst_sa.sin6_len = sizeof(dst_sa); dst_sa.sin6_len = sizeof(dst_sa);
dst_sa.sin6_addr = ip6->ip6_dst; dst_sa.sin6_addr = ip6->ip6_dst;
if (flevalid) { if (ro->ro_rt) {
rt = ro->ro_rt; rt = ro->ro_rt;
ifp = ro->ro_rt->rt_ifp; ifp = ro->ro_rt->rt_ifp;
} else if ((error = in6_selectroute_fib(&dst_sa, opt, im6o, ro, } else if ((error = in6_selectroute_fib(&dst_sa, opt, im6o, ro,
@ -1182,13 +1182,10 @@ sendorfree:
V_ip6stat.ip6s_fragmented++; V_ip6stat.ip6s_fragmented++;
done: done:
if (ro == &ip6route && ro->ro_rt && flevalid == 0) { if (ro == &ip6route)
/* brace necessary for RTFREE */ RO_RTFREE(ro);
RTFREE(ro->ro_rt); if (ro_pmtu == &ip6route)
} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt && RO_RTFREE(ro_pmtu);
((flevalid == 0) || (ro_pmtu != ro))) {
RTFREE(ro_pmtu->ro_rt);
}
#ifdef IPSEC #ifdef IPSEC
if (sp != NULL) if (sp != NULL)
KEY_FREESP(&sp); KEY_FREESP(&sp);

View File

@ -595,9 +595,9 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
icmp6_ifstat_inc(ifp, ifs6_out_neighborsolicit); icmp6_ifstat_inc(ifp, ifs6_out_neighborsolicit);
ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_SOLICIT]); ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_SOLICIT]);
if (ro.ro_rt) { /* we don't cache this route. */ /* We don't cache this route. */
RTFREE(ro.ro_rt); RO_RTFREE(&ro);
}
return; return;
bad: bad:
@ -1124,9 +1124,9 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0,
icmp6_ifstat_inc(ifp, ifs6_out_neighboradvert); icmp6_ifstat_inc(ifp, ifs6_out_neighboradvert);
ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_ADVERT]); ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_ADVERT]);
if (ro.ro_rt) { /* we don't cache this route. */ /* We don't cache this route. */
RTFREE(ro.ro_rt); RO_RTFREE(&ro);
}
return; return;
bad: bad: