ipoib: correct setting MTU from inside ipoib(4).

It is not enough to set ifnet->if_mtu to change the interface MTU.
System saves the MTU for route in the radix tree, and route cache keeps
the interface MTU as well. Since addition of the multicast group causes
recalculation of MTU, even bringing the interface up changes MTU from
4042 to 1500, which makes the system configuration inconsistent. Worse,
ip_output() prefers route MTU over interface MTU, so large packets are
not fragmented and dropped on floor.

Fix it for ipoib(4) using the same approach (or hack) as was applied
for it_tun/if_tap in r339012.  Thanks to bz@ for giving the hint.

Submitted by:   kib@
Approved by:    hselasky (mentor)
MFC after:      1 week
Sponsored by:   Mellanox Technologies
This commit is contained in:
Slava Shwartsman 2018-12-05 13:25:47 +00:00
parent e13619b68b
commit 099ad46e81
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=341535
3 changed files with 40 additions and 14 deletions

View File

@ -518,7 +518,7 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
struct ipoib_path *path);
#endif
int ipoib_change_mtu(struct ipoib_dev_priv *priv, int new_mtu);
int ipoib_change_mtu(struct ipoib_dev_priv *priv, int new_mtu, bool propagate);
int ipoib_mcast_attach(struct ipoib_dev_priv *priv, u16 mlid,
union ib_gid *mgid, int set_qkey);

View File

@ -257,10 +257,34 @@ ipoib_stop(struct ipoib_dev_priv *priv)
return 0;
}
int
ipoib_change_mtu(struct ipoib_dev_priv *priv, int new_mtu)
static int
ipoib_propagate_ifnet_mtu(struct ipoib_dev_priv *priv, int new_mtu,
bool propagate)
{
struct ifnet *dev = priv->dev;
struct ifnet *ifp;
struct ifreq ifr;
int error;
ifp = priv->dev;
if (ifp->if_mtu == new_mtu)
return (0);
if (propagate) {
strlcpy(ifr.ifr_name, if_name(ifp), IFNAMSIZ);
ifr.ifr_mtu = new_mtu;
CURVNET_SET(ifp->if_vnet);
error = ifhwioctl(SIOCSIFMTU, ifp, (caddr_t)&ifr, curthread);
CURVNET_RESTORE();
} else {
ifp->if_mtu = new_mtu;
error = 0;
}
return (error);
}
int
ipoib_change_mtu(struct ipoib_dev_priv *priv, int new_mtu, bool propagate)
{
int error, prev_admin_mtu;
/* dev->if_mtu > 2K ==> connected mode */
if (ipoib_cm_admin_enabled(priv)) {
@ -271,20 +295,21 @@ ipoib_change_mtu(struct ipoib_dev_priv *priv, int new_mtu)
ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n",
priv->mcast_mtu);
dev->if_mtu = new_mtu;
return 0;
return (ipoib_propagate_ifnet_mtu(priv, new_mtu, propagate));
}
if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu))
return -EINVAL;
prev_admin_mtu = priv->admin_mtu;
priv->admin_mtu = new_mtu;
dev->if_mtu = min(priv->mcast_mtu, priv->admin_mtu);
queue_work(ipoib_workqueue, &priv->flush_light);
return 0;
error = ipoib_propagate_ifnet_mtu(priv, min(priv->mcast_mtu,
priv->admin_mtu), propagate);
if (error == 0)
queue_work(ipoib_workqueue, &priv->flush_light);
else
priv->admin_mtu = prev_admin_mtu;
return (error);
}
static int
@ -338,7 +363,7 @@ ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
/*
* Set the interface MTU.
*/
error = -ipoib_change_mtu(priv, ifr->ifr_mtu);
error = -ipoib_change_mtu(priv, ifr->ifr_mtu, false);
break;
default:
error = EINVAL;

View File

@ -564,7 +564,8 @@ void ipoib_mcast_join_task(struct work_struct *work)
spin_unlock_irq(&priv->lock);
if (!ipoib_cm_admin_enabled(priv))
ipoib_change_mtu(priv, min(priv->mcast_mtu, priv->admin_mtu));
ipoib_change_mtu(priv, min(priv->mcast_mtu, priv->admin_mtu),
true);
ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");