From 1bfb4986099befab26dc0c1e40e47e89f92f62fb Mon Sep 17 00:00:00 2001 From: ume Date: Mon, 20 Oct 2003 15:27:48 +0000 Subject: [PATCH] correct linkmtu handling. Obtained from: KAME --- sys/netinet/tcp_input.c | 13 ++++- sys/netinet/tcp_reass.c | 13 ++++- sys/netinet6/in6.c | 7 ++- sys/netinet6/in6_rmx.c | 3 +- sys/netinet6/ip6_forward.c | 4 +- sys/netinet6/ip6_mroute.c | 7 ++- sys/netinet6/ip6_output.c | 113 ++++++++++++++++++++++--------------- sys/netinet6/nd6.c | 55 ++++++++---------- sys/netinet6/nd6.h | 1 - sys/netinet6/nd6_rtr.c | 35 +++++------- 10 files changed, 141 insertions(+), 110 deletions(-) diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 9d8d7335df18..a72906a4b9c2 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -2711,12 +2711,16 @@ tcp_mss(tp, offer) if (rt->rt_rmx.rmx_mtu) mss = rt->rt_rmx.rmx_mtu - min_protoh; else { +#ifdef INET6 + mss = (isipv6 ? IN6_LINKMTU(rt->rt_ifp) : ifp->if_mtu) + - min_protoh; +#else + mss = ifp->if_mtu - min_protoh; +#endif if (isipv6) { - mss = ND_IFINFO(rt->rt_ifp)->linkmtu - min_protoh; if (!in6_localaddr(&inp->in6p_faddr)) mss = min(mss, tcp_v6mssdflt); } else { - mss = ifp->if_mtu - min_protoh; if (!in_localaddr(inp->inp_faddr)) mss = min(mss, tcp_mssdflt); } @@ -2834,7 +2838,12 @@ tcp_mssopt(tp) if (rt == NULL) return (isipv6 ? tcp_v6mssdflt : tcp_mssdflt); +#ifdef INET6 + return (isipv6 ? IN6_LINKMTU(rt->rt_ifp) : + rt->rt_ifp->if_mtu - min_protoh); +#else return (rt->rt_ifp->if_mtu - min_protoh); +#endif } diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c index 9d8d7335df18..a72906a4b9c2 100644 --- a/sys/netinet/tcp_reass.c +++ b/sys/netinet/tcp_reass.c @@ -2711,12 +2711,16 @@ tcp_mss(tp, offer) if (rt->rt_rmx.rmx_mtu) mss = rt->rt_rmx.rmx_mtu - min_protoh; else { +#ifdef INET6 + mss = (isipv6 ? IN6_LINKMTU(rt->rt_ifp) : ifp->if_mtu) + - min_protoh; +#else + mss = ifp->if_mtu - min_protoh; +#endif if (isipv6) { - mss = ND_IFINFO(rt->rt_ifp)->linkmtu - min_protoh; if (!in6_localaddr(&inp->in6p_faddr)) mss = min(mss, tcp_v6mssdflt); } else { - mss = ifp->if_mtu - min_protoh; if (!in_localaddr(inp->inp_faddr)) mss = min(mss, tcp_mssdflt); } @@ -2834,7 +2838,12 @@ tcp_mssopt(tp) if (rt == NULL) return (isipv6 ? tcp_v6mssdflt : tcp_mssdflt); +#ifdef INET6 + return (isipv6 ? IN6_LINKMTU(rt->rt_ifp) : + rt->rt_ifp->if_mtu - min_protoh); +#else return (rt->rt_ifp->if_mtu - min_protoh); +#endif } diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index d1c5f8cd0c76..83b59137fb6a 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -2288,9 +2288,12 @@ in6_setmaxmtu() IFNET_RLOCK(); for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) { + /* this function can be called during ifnet initialization */ + if (!ifp->if_afdata[AF_INET6]) + continue; if ((ifp->if_flags & IFF_LOOPBACK) == 0 && - ND_IFINFO(ifp)->linkmtu > maxmtu) - maxmtu = ND_IFINFO(ifp)->linkmtu; + IN6_LINKMTU(ifp) > maxmtu) + maxmtu = IN6_LINKMTU(ifp); } IFNET_RUNLOCK(); if (maxmtu) /* update only when maxmtu is positive */ diff --git a/sys/netinet6/in6_rmx.c b/sys/netinet6/in6_rmx.c index cc1ba310d395..2853fe89e798 100644 --- a/sys/netinet6/in6_rmx.c +++ b/sys/netinet6/in6_rmx.c @@ -94,6 +94,7 @@ #include #include +#include #include #include @@ -149,7 +150,7 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) && rt->rt_ifp) - rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; + rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp); ret = rn_addroute(v_arg, n_arg, head, treenodes); if (ret == NULL && rt->rt_flags & RTF_HOST) { diff --git a/sys/netinet6/ip6_forward.c b/sys/netinet6/ip6_forward.c index 6414ab575ef2..3cc812eea032 100644 --- a/sys/netinet6/ip6_forward.c +++ b/sys/netinet6/ip6_forward.c @@ -383,7 +383,7 @@ ip6_forward(m, srcrt) return; } - if (m->m_pkthdr.len > rt->rt_ifp->if_mtu) { + if (m->m_pkthdr.len > IN6_LINKMTU(rt->rt_ifp)) { in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig); if (mcopy) { u_long mtu; @@ -393,7 +393,7 @@ ip6_forward(m, srcrt) size_t ipsechdrsiz; #endif - mtu = rt->rt_ifp->if_mtu; + mtu = IN6_LINKMTU(rt->rt_ifp); #ifdef IPSEC /* * When we do IPsec tunnel ingress, we need to play diff --git a/sys/netinet6/ip6_mroute.c b/sys/netinet6/ip6_mroute.c index eeb9b21710a9..607a2ea92034 100644 --- a/sys/netinet6/ip6_mroute.c +++ b/sys/netinet6/ip6_mroute.c @@ -113,6 +113,7 @@ #include #include +#include #include #include #include @@ -1454,6 +1455,7 @@ phyint_send(ip6, mifp, m) static struct route_in6 ro; struct in6_multi *in6m; struct sockaddr_in6 *dst6; + u_long linkmtu; /* * Make a new reference to the packet; make sure that @@ -1513,7 +1515,8 @@ phyint_send(ip6, mifp, m) * Put the packet into the sending queue of the outgoing interface * if it would fit in the MTU of the interface. */ - if (mb_copy->m_pkthdr.len <= ifp->if_mtu || ifp->if_mtu < IPV6_MMTU) { + linkmtu = IN6_LINKMTU(ifp); + if (mb_copy->m_pkthdr.len <= linkmtu || linkmtu < IPV6_MMTU) { dst6->sin6_len = sizeof(struct sockaddr_in6); dst6->sin6_family = AF_INET6; dst6->sin6_addr = ip6->ip6_dst; @@ -1530,7 +1533,7 @@ phyint_send(ip6, mifp, m) #endif } else { #ifdef MULTICAST_PMTUD - icmp6_error(mb_copy, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); + icmp6_error(mb_copy, ICMP6_PACKET_TOO_BIG, 0, linkmtu); #else #ifdef MRT6DEBUG if (mrt6debug & DEBUG_XMIT) diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index 1f0c0cca34b7..12dd2822c203 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -137,6 +137,8 @@ static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int, struct ip6_frag **)); static int ip6_insert_jumboopt __P((struct ip6_exthdrs *, u_int32_t)); static int ip6_splithdr __P((struct mbuf *, struct ip6_exthdrs *)); +static int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *, + struct ifnet *, struct in6_addr *, u_long *)); /* @@ -771,51 +773,9 @@ skip_ipsec2:; if (ifpp) *ifpp = ifp; - /* - * Determine path MTU. - */ - if (ro_pmtu != ro) { - /* The first hop and the final destination may differ. */ - struct sockaddr_in6 *sin6_fin = - (struct sockaddr_in6 *)&ro_pmtu->ro_dst; - if (ro_pmtu->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || - !IN6_ARE_ADDR_EQUAL(&sin6_fin->sin6_addr, - &finaldst))) { - RTFREE(ro_pmtu->ro_rt); - ro_pmtu->ro_rt = (struct rtentry *)0; - } - if (ro_pmtu->ro_rt == 0) { - bzero(sin6_fin, sizeof(*sin6_fin)); - sin6_fin->sin6_family = AF_INET6; - sin6_fin->sin6_len = sizeof(struct sockaddr_in6); - sin6_fin->sin6_addr = finaldst; - - rtalloc((struct route *)ro_pmtu); - } - } - if (ro_pmtu->ro_rt != NULL) { - u_int32_t ifmtu = ND_IFINFO(ifp)->linkmtu; - - mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu; - if (mtu > ifmtu || mtu == 0) { - /* - * The MTU on the route is larger than the MTU on - * the interface! This shouldn't happen, unless the - * MTU of the interface has been changed after the - * interface was brought up. Change the MTU in the - * route to match the interface MTU (as long as the - * field isn't locked). - * - * if MTU on the route is 0, we need to fix the MTU. - * this case happens with path MTU discovery timeouts. - */ - mtu = ifmtu; - if ((ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0) - ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */ - } - } else { - mtu = ND_IFINFO(ifp)->linkmtu; - } + /* Determine path MTU. */ + if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu)) != 0) + goto bad; /* * advanced API (IPV6_USE_MIN_MTU) overrides mtu setting @@ -1300,6 +1260,69 @@ ip6_insertfraghdr(m0, m, hlen, frghdrp) return (0); } +static int +ip6_getpmtu(ro_pmtu, ro, ifp, dst, mtup) + struct route_in6 *ro_pmtu, *ro; + struct ifnet *ifp; + struct in6_addr *dst; + u_long *mtup; +{ + u_int32_t mtu = 0; + int error = 0; + + /* + * Determine path MTU. + */ + if (ro_pmtu != ro) { + /* The first hop and the final destination may differ. */ + struct sockaddr_in6 *sa6_dst = + (struct sockaddr_in6 *)&ro_pmtu->ro_dst; + if (ro_pmtu->ro_rt && + ((ro->ro_rt->rt_flags & RTF_UP) == 0 || + !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) { + RTFREE(ro_pmtu->ro_rt); + ro_pmtu->ro_rt = (struct rtentry *)NULL; + } + if (ro_pmtu->ro_rt == NULL) { + bzero(sa6_dst, sizeof(*sa6_dst)); + sa6_dst->sin6_family = AF_INET6; + sa6_dst->sin6_len = sizeof(struct sockaddr_in6); + sa6_dst->sin6_addr = *dst; + + rtalloc((struct route *)ro_pmtu); + } + } + if (ro_pmtu->ro_rt) { + u_int32_t ifmtu; + + if (ifp == NULL) + ifp = ro_pmtu->ro_rt->rt_ifp; + ifmtu = IN6_LINKMTU(ifp); + mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu; + if (mtu == 0) + mtu = ifmtu; + else if (mtu > ifmtu || mtu == 0) { + /* + * The MTU on the route is larger than the MTU on + * the interface! This shouldn't happen, unless the + * MTU of the interface has been changed after the + * interface was brought up. Change the MTU in the + * route to match the interface MTU (as long as the + * field isn't locked). + */ + mtu = ifmtu; + if (!(ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU)) + ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; + } + } else if (ifp) { + mtu = IN6_LINKMTU(ifp); + } else + error = EHOSTUNREACH; /* XXX */ + + *mtup = mtu; + return (error); +} + /* * IP6 socket option processing. */ diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index d7a0e6ff6bae..7a74cdf997b4 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -51,6 +51,7 @@ #include #include +#include #include #include #include @@ -149,12 +150,10 @@ nd6_ifattach(ifp) nd->initialized = 1; - nd->linkmtu = ifnet_byindex(ifp->if_index)->if_mtu; nd->chlim = IPV6_DEFHLIM; nd->basereachable = REACHABLE_TIME; nd->reachable = ND_COMPUTE_RTIME(nd->basereachable); nd->retrans = RETRANS_TIMER; - nd->receivedra = 0; /* * Note that the default value of ip6_accept_rtadv is 0, which means * we won't accept RAs by default even if we set ND6_IFF_ACCEPT_RTADV @@ -194,21 +193,19 @@ nd6_setmtu0(ifp, ndi) struct ifnet *ifp; struct nd_ifinfo *ndi; { - u_long oldmaxmtu; - u_long oldlinkmtu; + u_int32_t omaxmtu; - oldmaxmtu = ndi->maxmtu; - oldlinkmtu = ndi->linkmtu; + omaxmtu = ndi->maxmtu; switch (ifp->if_type) { - case IFT_ARCNET: /* XXX MTU handling needs more work */ - ndi->maxmtu = MIN(60480, ifp->if_mtu); + case IFT_ARCNET: + ndi->maxmtu = MIN(ARC_PHDS_MAXMTU, ifp->if_mtu); /* RFC2497 */ break; case IFT_ETHER: ndi->maxmtu = MIN(ETHERMTU, ifp->if_mtu); break; case IFT_FDDI: - ndi->maxmtu = MIN(FDDIIPMTU, ifp->if_mtu); + ndi->maxmtu = MIN(FDDIIPMTU, ifp->if_mtu); /* RFC2467 */ break; case IFT_ATM: ndi->maxmtu = MIN(ATMMTU, ifp->if_mtu); @@ -229,29 +226,21 @@ nd6_setmtu0(ifp, ndi) break; } - if (oldmaxmtu != ndi->maxmtu) { - /* - * If the ND level MTU is not set yet, or if the maxmtu - * is reset to a smaller value than the ND level MTU, - * also reset the ND level MTU. - */ - if (ndi->linkmtu == 0 || - ndi->maxmtu < ndi->linkmtu) { - ndi->linkmtu = ndi->maxmtu; - /* also adjust in6_maxmtu if necessary. */ - if (oldlinkmtu == 0) { - /* - * XXX: the case analysis is grotty, but - * it is not efficient to call in6_setmaxmtu() - * here when we are during the initialization - * procedure. - */ - if (in6_maxmtu < ndi->linkmtu) - in6_maxmtu = ndi->linkmtu; - } else - in6_setmaxmtu(); - } + /* + * Decreasing the interface MTU under IPV6 minimum MTU may cause + * undesirable situation. We thus notify the operator of the change + * explicitly. The check for omaxmtu is necessary to restrict the + * log to the case of changing the MTU, not initializing it. + */ + if (omaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) { + log(LOG_NOTICE, "nd6_setmtu0: " + "new link MTU on %s (%lu) is too small for IPv6\n", + if_name(ifp), (unsigned long)ndi->maxmtu); } + + if (ndi->maxmtu > in6_maxmtu) + in6_setmaxmtu(); /* check all interfaces just in case */ + #undef MIN } @@ -1449,7 +1438,7 @@ nd6_ioctl(cmd, data, ifp) case OSIOCGIFINFO_IN6: /* XXX: old ndp(8) assumes a positive value for linkmtu. */ bzero(&ndi->ndi, sizeof(ndi->ndi)); - ndi->ndi.linkmtu = ND_IFINFO(ifp)->linkmtu; + ndi->ndi.linkmtu = IN6_LINKMTU(ifp); ndi->ndi.maxmtu = ND_IFINFO(ifp)->maxmtu; ndi->ndi.basereachable = ND_IFINFO(ifp)->basereachable; ndi->ndi.reachable = ND_IFINFO(ifp)->reachable; @@ -1457,10 +1446,10 @@ nd6_ioctl(cmd, data, ifp) ndi->ndi.flags = ND_IFINFO(ifp)->flags; ndi->ndi.recalctm = ND_IFINFO(ifp)->recalctm; ndi->ndi.chlim = ND_IFINFO(ifp)->chlim; - ndi->ndi.receivedra = ND_IFINFO(ifp)->receivedra; break; case SIOCGIFINFO_IN6: ndi->ndi = *ND_IFINFO(ifp); + ndi->ndi.linkmtu = IN6_LINKMTU(ifp); break; case SIOCSIFINFO_FLAGS: ND_IFINFO(ifp)->flags = ndi->ndi.flags; diff --git a/sys/netinet6/nd6.h b/sys/netinet6/nd6.h index 2ee4a48e5de5..576231246dc9 100644 --- a/sys/netinet6/nd6.h +++ b/sys/netinet6/nd6.h @@ -79,7 +79,6 @@ struct nd_ifinfo { u_int32_t flags; /* Flags */ int recalctm; /* BaseReacable re-calculation timer */ u_int8_t chlim; /* CurHopLimit */ - u_int8_t receivedra; u_int8_t initialized; /* Flag to see the entry is initialized */ /* the following 3 members are for privacy extension for addrconf */ u_int8_t randomseed0[8]; /* upper 64 bits of MD5 digest */ diff --git a/sys/netinet6/nd6_rtr.c b/sys/netinet6/nd6_rtr.c index ec65cb7abbed..e5e61dd7e586 100644 --- a/sys/netinet6/nd6_rtr.c +++ b/sys/netinet6/nd6_rtr.c @@ -364,38 +364,33 @@ nd6_ra_input(m, off, icmp6len) * MTU */ if (ndopts.nd_opts_mtu && ndopts.nd_opts_mtu->nd_opt_mtu_len == 1) { - u_int32_t mtu; + u_long mtu; + u_long maxmtu; - mtu = ntohl(ndopts.nd_opts_mtu->nd_opt_mtu_mtu); + mtu = (u_long)ntohl(ndopts.nd_opts_mtu->nd_opt_mtu_mtu); /* lower bound */ if (mtu < IPV6_MMTU) { nd6log((LOG_INFO, "nd6_ra_input: bogus mtu option " - "mtu=%d sent from %s, ignoring\n", + "mtu=%lu sent from %s, ignoring\n", mtu, ip6_sprintf(&ip6->ip6_src))); goto skip; } /* upper bound */ - if (ndi->maxmtu) { - if (mtu <= ndi->maxmtu) { - int change = (ndi->linkmtu != mtu); + maxmtu = (ndi->maxmtu && ndi->maxmtu < ifp->if_mtu) + ? ndi->maxmtu : ifp->if_mtu; + if (mtu <= maxmtu) { + int change = (ndi->linkmtu != mtu); - ndi->linkmtu = mtu; - if (change) /* in6_maxmtu may change */ - in6_setmaxmtu(); - } else { - nd6log((LOG_INFO, "nd6_ra_input: bogus mtu " - "mtu=%d sent from %s; " - "exceeds maxmtu %d, ignoring\n", - mtu, ip6_sprintf(&ip6->ip6_src), - ndi->maxmtu)); - } + ndi->linkmtu = mtu; + if (change) /* in6_maxmtu may change */ + in6_setmaxmtu(); } else { - nd6log((LOG_INFO, "nd6_ra_input: mtu option " - "mtu=%d sent from %s; maxmtu unknown, " - "ignoring\n", - mtu, ip6_sprintf(&ip6->ip6_src))); + nd6log((LOG_INFO, "nd6_ra_input: bogus mtu " + "mtu=%lu sent from %s; " + "exceeds maxmtu %lu, ignoring\n", + mtu, ip6_sprintf(&ip6->ip6_src), maxmtu)); } }