Fix for RDMA loopback over VLAN in ibcore.

Implement a more generic solution for detecting loopback.
The problem was that the default netdevice was resolved
for loopback also when VLAN was used. Use real network
device instead of loopback device for bound device
interface.

How to test:
ucmatose -b 127.0.0.1 -p 20090
ucmatose -s 5.6.5.1 -p 20090

Note that RDMA treats the IPv4 and IPv6 loopback
addresses like any address.

MFC after:		1 week
Sponsored by:		Mellanox Technologies
This commit is contained in:
Hans Petter Selasky 2018-07-17 09:02:29 +00:00
parent f9899e4567
commit fed17c5852
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=336368
3 changed files with 96 additions and 85 deletions

View File

@ -124,7 +124,7 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
const unsigned char *dst_dev_addr)
{
/* check for loopback device */
if (dev->if_type == IFT_LOOP) {
if (dev->if_flags & IFF_LOOPBACK) {
dev_addr->dev_type = ARPHRD_ETHER;
memset(dev_addr->src_dev_addr, 0, MAX_ADDR_LEN);
memset(dev_addr->broadcast, 0, MAX_ADDR_LEN);
@ -153,19 +153,12 @@ EXPORT_SYMBOL(rdma_copy_addr);
int rdma_translate_ip(const struct sockaddr *addr,
struct rdma_dev_addr *dev_addr)
{
struct net_device *dev = NULL;
int ret = -EADDRNOTAVAIL;
struct net_device *dev;
int ret;
if (dev_addr->bound_dev_if) {
dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
if (!dev)
return -ENODEV;
ret = rdma_copy_addr(dev_addr, dev, NULL);
dev_put(dev);
return ret;
}
switch (addr->sa_family) {
} else switch (addr->sa_family) {
#ifdef INET
case AF_INET:
dev = ip_dev_find(dev_addr->net,
@ -179,12 +172,19 @@ int rdma_translate_ip(const struct sockaddr *addr,
break;
#endif
default:
dev = NULL;
break;
}
if (dev != NULL) {
ret = rdma_copy_addr(dev_addr, dev, NULL);
/* disallow connections through 127.0.0.1 itself */
if (dev->if_flags & IFF_LOOPBACK)
ret = -EINVAL;
else
ret = rdma_copy_addr(dev_addr, dev, NULL);
dev_put(dev);
} else {
ret = -ENODEV;
}
return ret;
}
@ -305,20 +305,39 @@ static int addr4_resolve(struct sockaddr_in *src_in,
/* Step 2 - find outgoing network interface */
switch (type) {
case ADDR_VALID:
/* check for loopback device */
if (rte->rt_ifp->if_flags & IFF_LOOPBACK) {
ifp = rte->rt_ifp;
dev_hold(ifp);
} else if (addr->bound_dev_if != 0) {
/* get source interface */
if (addr->bound_dev_if != 0) {
ifp = dev_get_by_index(addr->net, addr->bound_dev_if);
} else {
ifp = ip_dev_find(addr->net, src_in->sin_addr.s_addr);
}
/* check source interface */
if (ifp == NULL) {
error = ENETUNREACH;
goto error_rt_free;
} else if (ifp->if_flags & IFF_LOOPBACK) {
/*
* Source address cannot be a loopback device.
*/
error = EHOSTUNREACH;
goto error_put_ifp;
} else if (rte->rt_ifp->if_flags & IFF_LOOPBACK) {
if (memcmp(&src_in->sin_addr, &dst_in->sin_addr,
sizeof(src_in->sin_addr))) {
/*
* Destination is loopback, but source
* and destination address is not the
* same.
*/
error = EHOSTUNREACH;
goto error_put_ifp;
}
} else if (ifp != rte->rt_ifp) {
/*
* Source and destination interfaces are
* different.
*/
error = ENETUNREACH;
goto error_put_ifp;
}
@ -481,20 +500,39 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
/* Step 2 - find outgoing network interface */
switch (type) {
case ADDR_VALID:
/* check for loopback device */
if (rte->rt_ifp->if_flags & IFF_LOOPBACK) {
ifp = rte->rt_ifp;
dev_hold(ifp);
} else if (addr->bound_dev_if != 0) {
/* get source interface */
if (addr->bound_dev_if != 0) {
ifp = dev_get_by_index(addr->net, addr->bound_dev_if);
} else {
ifp = ip6_dev_find(addr->net, src_in->sin6_addr);
}
/* check source interface */
if (ifp == NULL) {
error = ENETUNREACH;
goto error_rt_free;
} else if (ifp->if_flags & IFF_LOOPBACK) {
/*
* Source address cannot be a loopback device.
*/
error = EHOSTUNREACH;
goto error_put_ifp;
} else if (rte->rt_ifp->if_flags & IFF_LOOPBACK) {
if (memcmp(&src_in->sin6_addr, &dst_in->sin6_addr,
sizeof(src_in->sin6_addr))) {
/*
* Destination is loopback, but source
* and destination address is not the
* same.
*/
error = EHOSTUNREACH;
goto error_put_ifp;
}
} else if (ifp != rte->rt_ifp) {
/*
* Source and destination interfaces are
* different.
*/
error = ENETUNREACH;
goto error_put_ifp;
}
@ -586,11 +624,14 @@ static int addr_resolve_neigh(struct ifnet *dev,
if (dev->if_flags & IFF_LOOPBACK) {
int ret;
/* find real device, not loopback one */
addr->bound_dev_if = 0;
ret = rdma_translate_ip(dst_in, addr);
if (!ret)
if (ret == 0) {
memcpy(addr->dst_dev_addr, addr->src_dev_addr,
MAX_ADDR_LEN);
}
return ret;
}
@ -603,8 +644,7 @@ static int addr_resolve_neigh(struct ifnet *dev,
static int addr_resolve(struct sockaddr *src_in,
const struct sockaddr *dst_in,
struct rdma_dev_addr *addr,
bool resolve_neigh)
struct rdma_dev_addr *addr)
{
struct net_device *ndev = NULL;
u8 edst[MAX_ADDR_LEN];
@ -613,27 +653,30 @@ static int addr_resolve(struct sockaddr *src_in,
if (dst_in->sa_family != src_in->sa_family)
return -EINVAL;
if (src_in->sa_family == AF_INET) {
switch (src_in->sa_family) {
case AF_INET:
ret = addr4_resolve((struct sockaddr_in *)src_in,
(const struct sockaddr_in *)dst_in,
addr, edst, &ndev);
if (ret)
return ret;
if (resolve_neigh)
ret = addr_resolve_neigh(ndev, dst_in, edst, addr);
} else {
break;
case AF_INET6:
ret = addr6_resolve((struct sockaddr_in6 *)src_in,
(const struct sockaddr_in6 *)dst_in, addr,
edst, &ndev);
if (ret)
return ret;
if (resolve_neigh)
ret = addr_resolve_neigh(ndev, dst_in, edst, addr);
break;
default:
ret = -EADDRNOTAVAIL;
break;
}
addr->bound_dev_if = ndev->if_index;
/* check for error */
if (ret != 0)
return ret;
/* store MAC addresses and check for loopback */
ret = addr_resolve_neigh(ndev, dst_in, edst, addr);
/* set belonging VNET, if any */
addr->net = dev_net(ndev);
dev_put(ndev);
@ -653,8 +696,7 @@ static void process_req(struct work_struct *work)
if (req->status == -ENODATA) {
src_in = (struct sockaddr *) &req->src_addr;
dst_in = (struct sockaddr *) &req->dst_addr;
req->status = addr_resolve(src_in, dst_in, req->addr,
true);
req->status = addr_resolve(src_in, dst_in, req->addr);
if (req->status && time_after_eq(jiffies, req->timeout))
req->status = -ETIMEDOUT;
else if (req->status == -ENODATA)
@ -714,7 +756,7 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
req->client = client;
atomic_inc(&client->refcount);
req->status = addr_resolve(src_in, dst_in, addr, true);
req->status = addr_resolve(src_in, dst_in, addr);
switch (req->status) {
case 0:
req->timeout = jiffies;
@ -752,7 +794,7 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr,
src_in->sa_family = dst_addr->sa_family;
}
return addr_resolve(src_in, dst_addr, addr, false);
return addr_resolve(src_in, dst_addr, addr);
}
EXPORT_SYMBOL(rdma_resolve_ip_route);

View File

@ -568,12 +568,12 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
static inline int cma_validate_port(struct ib_device *device, u8 port,
enum ib_gid_type gid_type,
union ib_gid *gid, int dev_type,
struct vnet *net,
int bound_if_index)
union ib_gid *gid,
const struct rdma_dev_addr *dev_addr)
{
const int dev_type = dev_addr->dev_type;
struct net_device *ndev;
int ret = -ENODEV;
struct net_device *ndev = NULL;
if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
return ret;
@ -582,19 +582,9 @@ static inline int cma_validate_port(struct ib_device *device, u8 port,
return ret;
if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
ndev = dev_get_by_index(net, bound_if_index);
if (ndev && ndev->if_flags & IFF_LOOPBACK) {
pr_info("detected loopback device\n");
dev_put(ndev);
if (!device->get_netdev)
return -EOPNOTSUPP;
ndev = device->get_netdev(device, port);
if (!ndev)
return -ENODEV;
}
ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
} else {
ndev = NULL;
gid_type = IB_GID_TYPE_IB;
}
@ -636,10 +626,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
ret = cma_validate_port(cma_dev->device, port,
rdma_protocol_ib(cma_dev->device, port) ?
IB_GID_TYPE_IB :
listen_id_priv->gid_type, gidp,
dev_addr->dev_type,
dev_addr->net,
dev_addr->bound_dev_if);
listen_id_priv->gid_type, gidp, dev_addr);
if (!ret) {
id_priv->id.port_num = port;
goto out;
@ -660,9 +647,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
rdma_protocol_ib(cma_dev->device, port) ?
IB_GID_TYPE_IB :
cma_dev->default_gid_type[port - 1],
gidp, dev_addr->dev_type,
dev_addr->net,
dev_addr->bound_dev_if);
gidp, dev_addr);
if (!ret) {
id_priv->id.port_num = port;
goto out;
@ -2523,21 +2508,6 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
goto err2;
}
if (ndev->if_flags & IFF_LOOPBACK) {
dev_put(ndev);
if (!id_priv->id.device->get_netdev) {
ret = -EOPNOTSUPP;
goto err2;
}
ndev = id_priv->id.device->get_netdev(id_priv->id.device,
id_priv->id.port_num);
if (!ndev) {
ret = -ENODEV;
goto err2;
}
}
route->path_rec->net = ndev->if_vnet;
route->path_rec->ifindex = ndev->if_index;
supported_gids = roce_gid_type_mask_support(id_priv->id.device,

View File

@ -696,10 +696,9 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
resolved_dev = dev_get_by_index(dev_addr.net,
dev_addr.bound_dev_if);
if (resolved_dev->if_flags & IFF_LOOPBACK) {
dev_put(resolved_dev);
resolved_dev = idev;
dev_hold(resolved_dev);
if (!resolved_dev) {
dev_put(idev);
return -ENODEV;
}
ndev = ib_get_ndev_from_path(rec);
rcu_read_lock();