Make checks for rt_mtu generic:

Some virtual if drivers has (ab)used ifa ifa_rtrequest hook to enforce
route MTU to be not bigger that interface MTU. While ifa_rtrequest hooking
might be an option in some situation, it is not feasible to do MTU checks
there: generic (or per-domain) routing code is perfectly capable of doing
this.

We currrently have 3 places where MTU is altered:

1) route addition.
 In this case domain overrides radix _addroute callback (in[6]_addroute)
 and all necessary checks/fixes are/can be done there.

2) route change (especially, GW change).
 In this case, there are no explicit per-domain calls, but one can
 override rte by setting ifa_rtrequest hook to domain handler
 (inet6 does this).

3) ifconfig ifaceX mtu YYYY
 In this case, we have no callbacks, but ip[6]_output performes runtime
 checks and decreases rt_mtu if necessary.

Generally, the goals are to be able to handle all MTU changes in
 control plane, not in runtime part, and properly deal with increased
 interface MTU.

This commit changes the following:
* removes hooks setting MTU from drivers side
* adds proper per-doman MTU checks for case 1)
* adds generic MTU check for case 2)

* The latter is done by using new dom_ifmtu callback since
 if_mtu denotes L3 interface MTU, e.g. maximum trasmitted _packet_ size.
 However, IPv6 mtu might be different from if_mtu one (e.g. default 1280)
 for some cases, so we need an abstract way to know maximum MTU size
 for given interface and domain.
* moves rt_setmetrics() before MTU/ifa_rtrequest hooks since it copies
  user-supplied data which must be checked.
* removes RT_LOCK_ASSERT() from other ifa_rtrequest hooks to be able to
  use this functions on new non-inserted rte.

More changes will follow soon.

MFC after:	1 month
Sponsored by:	Yandex LLC
This commit is contained in:
Alexander V. Chernikov 2014-11-06 13:13:09 +00:00
parent 295e858606
commit 1a75e3b20f
14 changed files with 63 additions and 66 deletions

View File

@ -2009,8 +2009,6 @@ link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
struct sockaddr *dst;
struct ifnet *ifp;
RT_LOCK_ASSERT(rt);
if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
return;
@ -3758,6 +3756,19 @@ if_getmtu(if_t ifp)
return ((struct ifnet *)ifp)->if_mtu;
}
int
if_getmtu_family(if_t ifp, int family)
{
struct domain *dp;
for (dp = domains; dp; dp = dp->dom_next) {
if (dp->dom_family == family && dp->dom_ifmtu != NULL)
return (dp->dom_ifmtu((struct ifnet *)ifp));
}
return (((struct ifnet *)ifp)->if_mtu);
}
int
if_setsoftc(if_t ifp, void *softc)
{

View File

@ -67,7 +67,6 @@ struct disc_softc {
static int discoutput(struct ifnet *, struct mbuf *,
const struct sockaddr *, struct route *);
static void discrtrequest(int, struct rtentry *, struct rt_addrinfo *);
static int discioctl(struct ifnet *, u_long, caddr_t);
static int disc_clone_create(struct if_clone *, int, caddr_t);
static void disc_clone_destroy(struct ifnet *);
@ -198,31 +197,19 @@ discoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
return (0);
}
/* ARGSUSED */
static void
discrtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
{
RT_LOCK_ASSERT(rt);
rt->rt_mtu = DSMTU;
}
/*
* Process an ioctl request.
*/
static int
discioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
struct ifaddr *ifa;
struct ifreq *ifr = (struct ifreq *)data;
int error = 0;
switch (cmd) {
case SIOCSIFADDR:
ifp->if_flags |= IFF_UP;
ifa = (struct ifaddr *)data;
if (ifa != 0)
ifa->ifa_rtrequest = discrtrequest;
/*
* Everything else is done at a higher level.
*/

View File

@ -87,7 +87,6 @@ struct faith_softc {
static int faithioctl(struct ifnet *, u_long, caddr_t);
static int faithoutput(struct ifnet *, struct mbuf *, const struct sockaddr *,
struct route *);
static void faithrtrequest(int, struct rtentry *, struct rt_addrinfo *);
#ifdef INET6
static int faithprefix(struct in6_addr *);
#endif
@ -238,17 +237,6 @@ faithoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
return (0);
}
/* ARGSUSED */
static void
faithrtrequest(cmd, rt, info)
int cmd;
struct rtentry *rt;
struct rt_addrinfo *info;
{
RT_LOCK_ASSERT(rt);
rt->rt_mtu = rt->rt_ifp->if_mtu;
}
/*
* Process an ioctl request.
*/
@ -259,7 +247,6 @@ faithioctl(ifp, cmd, data)
u_long cmd;
caddr_t data;
{
struct ifaddr *ifa;
struct ifreq *ifr = (struct ifreq *)data;
int error = 0;
@ -268,8 +255,7 @@ faithioctl(ifp, cmd, data)
case SIOCSIFADDR:
ifp->if_flags |= IFF_UP;
ifp->if_drv_flags |= IFF_DRV_RUNNING;
ifa = (struct ifaddr *)data;
ifa->ifa_rtrequest = faithrtrequest;
/*
* Everything else is done at a higher level.
*/

View File

@ -88,7 +88,6 @@
CSUM_SCTP_VALID)
int loioctl(struct ifnet *, u_long, caddr_t);
static void lortrequest(int, struct rtentry *, struct rt_addrinfo *);
int looutput(struct ifnet *ifp, struct mbuf *m,
const struct sockaddr *dst, struct route *ro);
static int lo_clone_create(struct if_clone *, int, caddr_t);
@ -364,15 +363,6 @@ if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen)
return (0);
}
/* ARGSUSED */
static void
lortrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
{
RT_LOCK_ASSERT(rt);
rt->rt_mtu = rt->rt_ifp->if_mtu;
}
/*
* Process an ioctl request.
*/
@ -380,7 +370,6 @@ lortrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
int
loioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
struct ifaddr *ifa;
struct ifreq *ifr = (struct ifreq *)data;
int error = 0, mask;
@ -388,8 +377,6 @@ loioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
case SIOCSIFADDR:
ifp->if_flags |= IFF_UP;
ifp->if_drv_flags |= IFF_DRV_RUNNING;
ifa = (struct ifaddr *)data;
ifa->ifa_rtrequest = lortrequest;
/*
* Everything else is done at a higher level.
*/

View File

@ -179,7 +179,6 @@ static int stf_checkaddr4(struct stf_softc *, struct in_addr *,
struct ifnet *);
static int stf_checkaddr6(struct stf_softc *, struct in6_addr *,
struct ifnet *);
static void stf_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
static int stf_ioctl(struct ifnet *, u_long, caddr_t);
static int stf_clone_match(struct if_clone *, const char *);
@ -722,17 +721,6 @@ in_stf_input(struct mbuf **mp, int *offp, int proto)
return (IPPROTO_DONE);
}
/* ARGSUSED */
static void
stf_rtrequest(cmd, rt, info)
int cmd;
struct rtentry *rt;
struct rt_addrinfo *info;
{
RT_LOCK_ASSERT(rt);
rt->rt_mtu = rt->rt_ifp->if_mtu;
}
static int
stf_ioctl(ifp, cmd, data)
struct ifnet *ifp;
@ -764,7 +752,6 @@ stf_ioctl(ifp, cmd, data)
break;
}
ifa->ifa_rtrequest = stf_rtrequest;
ifp->if_flags |= IFF_UP;
break;

View File

@ -556,6 +556,7 @@ void *if_getsoftc(if_t ifp);
int if_setflags(if_t ifp, int flags);
int if_setmtu(if_t ifp, int mtu);
int if_getmtu(if_t ifp);
int if_getmtu_family(if_t ifp, int family);
int if_setflagbits(if_t ifp, int set, int clear);
int if_getflags(if_t ifp);
int if_sendq_empty(if_t ifp);

View File

@ -1279,6 +1279,8 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
rt->rt_ifp = ifa->ifa_ifp;
rt->rt_weight = 1;
rt_setmetrics(info, rt);
#ifdef RADIX_MPATH
/* do not permit exactly the same dst/mask/gw pair */
if (rn_mpath_capable(rnh) &&
@ -1373,8 +1375,6 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
if (ifa->ifa_rtrequest)
ifa->ifa_rtrequest(req, rt, info);
rt_setmetrics(info, rt);
/*
* actually return a resultant rtentry and
* give the caller a single reference.
@ -1412,6 +1412,7 @@ rtrequest1_fib_change(struct radix_node_head *rnh, struct rt_addrinfo *info,
struct rtentry *rt = NULL;
int error = 0;
int free_ifa = 0;
int family, mtu;
rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
info->rti_info[RTAX_NETMASK], rnh);
@ -1433,6 +1434,8 @@ rtrequest1_fib_change(struct radix_node_head *rnh, struct rt_addrinfo *info,
RT_LOCK(rt);
rt_setmetrics(info, rt);
/*
* New gateway could require new ifaddr, ifp;
* flags may also be different; ifp may be specified
@ -1480,7 +1483,13 @@ rtrequest1_fib_change(struct radix_node_head *rnh, struct rt_addrinfo *info,
if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest != NULL)
rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, info);
rt_setmetrics(info, rt);
/* Ensure route MTU is not bigger than interface MTU */
if (rt->rt_ifp != NULL) {
family = info->rti_info[RTAX_DST]->sa_family;
mtu = if_getmtu_family(rt->rt_ifp, family);
if (rt->rt_mtu > mtu)
rt->rt_mtu = mtu;
}
if (ret_nrt) {
*ret_nrt = rt;

View File

@ -94,8 +94,18 @@ in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
rt->rt_flags |= RTF_MULTICAST;
if (rt->rt_mtu == 0 && rt->rt_ifp != NULL)
rt->rt_mtu = rt->rt_ifp->if_mtu;
if (rt->rt_ifp != NULL) {
/*
* Check route MTU:
* inherit interface MTU if not set or
* check if MTU is too large.
*/
if (rt->rt_mtu == 0) {
rt->rt_mtu = rt->rt_ifp->if_mtu;
} else if (rt->rt_mtu > rt->rt_ifp->if_mtu)
rt->rt_mtu = rt->rt_ifp->if_mtu;
}
return (rn_addroute(v_arg, n_arg, head, treenodes));
}

View File

@ -2367,6 +2367,13 @@ in6_domifattach(struct ifnet *ifp)
return ext;
}
int
in6_domifmtu(struct ifnet *ifp)
{
return (IN6_LINKMTU(ifp));
}
void
in6_domifdetach(struct ifnet *ifp, void *aux)
{

View File

@ -382,7 +382,8 @@ struct domain inet6domain = {
.dom_rtoffset = offsetof(struct sockaddr_in6, sin6_addr) << 3,
.dom_maxrtkey = sizeof(struct sockaddr_in6),
.dom_ifattach = in6_domifattach,
.dom_ifdetach = in6_domifdetach
.dom_ifdetach = in6_domifdetach,
.dom_ifmtu = in6_domifmtu
};
VNET_DOMAIN_SET(inet6);

View File

@ -136,8 +136,18 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
}
}
if (!rt->rt_mtu && rt->rt_ifp)
rt->rt_mtu = IN6_LINKMTU(rt->rt_ifp);
if (rt->rt_ifp != NULL) {
/*
* Check route MTU:
* inherit interface MTU if not set or
* check if MTU is too large.
*/
if (rt->rt_mtu == 0) {
rt->rt_mtu = IN6_LINKMTU(rt->rt_ifp);
} else if (rt->rt_mtu > IN6_LINKMTU(rt->rt_ifp))
rt->rt_mtu = IN6_LINKMTU(rt->rt_ifp);
}
ret = rn_addroute(v_arg, n_arg, head, treenodes);
if (ret == NULL) {

View File

@ -807,6 +807,7 @@ int in6if_do_dad(struct ifnet *);
void in6_savemkludge(struct in6_ifaddr *);
void *in6_domifattach(struct ifnet *);
void in6_domifdetach(struct ifnet *, void *);
int in6_domifmtu(struct ifnet *);
void in6_setmaxmtu(void);
int in6_if2idlen(struct ifnet *);
struct in6_ifaddr *in6ifa_ifpforlinklocal(struct ifnet *, int);

View File

@ -1188,7 +1188,6 @@ nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info)
struct nd_defrouter *dr;
struct ifnet *ifp;
RT_LOCK_ASSERT(rt);
gateway = (struct sockaddr_in6 *)rt->rt_gateway;
ifp = rt->rt_ifp;

View File

@ -64,6 +64,7 @@ struct domain {
int dom_maxrtkey; /* for routing layer */
void *(*dom_ifattach)(struct ifnet *);
void (*dom_ifdetach)(struct ifnet *, void *);
int (*dom_ifmtu)(struct ifnet *);
/* af-dependent data on ifnet */
};