Get correct network device when accepting incoming RDMA connections in ibcore.

This patch ensures the GID index is always used as a basis of resolving
incoming RDMA connections, as compared to the GID value itself.

Background:
On a per infiniband port basis, the GID identifier is not a unique identifier!
This assumption falls apart when VLAN ID, IPv6 scope ID and RoCE type,
as supported by RoCE v2, is taken into account. This additional
information is stored in the so-called GID attributes and is needed to
correctly identify the destination network interface for an incoming
connection.

Different VLANs are allowed to define the same IPv4 addresses and especially
for the default IPv6 link-local addresses or when using so-called containers
or jails, this is true.

The VNET information for the destination network interface is needed in
order to perform the L2 address lookup in the right Virtual Network Stack
context.

Consequently old functions previously used by RoCE v1, like
rdma_addr_find_smac_by_sgid() are impossible to support, because
there can be multiple identical GIDs associated with the same
infiniband port, and the answer to such a request becomes undefined.
This function has been removed.

MFC after:	1 week
Sponsored by:	Mellanox Technologies
This commit is contained in:
hselasky 2018-03-05 14:24:30 +00:00
parent 22d19d1493
commit 1a0dfe6582
4 changed files with 31 additions and 94 deletions

View File

@ -151,8 +151,7 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
EXPORT_SYMBOL(rdma_copy_addr);
int rdma_translate_ip(const struct sockaddr *addr,
struct rdma_dev_addr *dev_addr,
u16 *vlan_id)
struct rdma_dev_addr *dev_addr)
{
struct net_device *dev = NULL;
int ret = -EADDRNOTAVAIL;
@ -185,8 +184,6 @@ int rdma_translate_ip(const struct sockaddr *addr,
if (dev != NULL) {
ret = rdma_copy_addr(dev_addr, dev, NULL);
if (vlan_id)
*vlan_id = rdma_vlan_dev_vlan_id(dev);
dev_put(dev);
}
return ret;
@ -305,6 +302,8 @@ static int addr4_resolve(struct sockaddr_in *src_in,
if (rte->rt_ifp->if_flags & IFF_LOOPBACK) {
ifp = rte->rt_ifp;
dev_hold(ifp);
} else if (addr->bound_dev_if != 0) {
ifp = dev_get_by_index(addr->net, addr->bound_dev_if);
} else {
ifp = ip_dev_find(addr->net, src_in->sin_addr.s_addr);
}
@ -460,6 +459,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
if (rte->rt_ifp->if_flags & IFF_LOOPBACK) {
ifp = rte->rt_ifp;
dev_hold(ifp);
} else if (addr->bound_dev_if != 0) {
ifp = dev_get_by_index(addr->net, addr->bound_dev_if);
} else {
ifp = ip6_dev_find(addr->net, src_in->sin6_addr);
}
@ -551,7 +552,7 @@ static int addr_resolve_neigh(struct ifnet *dev,
if (dev->if_flags & IFF_LOOPBACK) {
int ret;
ret = rdma_translate_ip(dst_in, addr, NULL);
ret = rdma_translate_ip(dst_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, addr->src_dev_addr,
MAX_ADDR_LEN);
@ -757,13 +758,12 @@ static void resolve_cb(int status, struct sockaddr *src_addr,
int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
const union ib_gid *dgid,
u8 *dmac, u16 *vlan_id, int *if_index,
u8 *dmac, struct net_device *dev,
int *hoplimit)
{
int ret = 0;
struct rdma_dev_addr dev_addr;
struct resolve_cb_context ctx;
struct net_device *dev;
union {
struct sockaddr _sockaddr;
@ -771,14 +771,13 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
struct sockaddr_in6 _sockaddr_in6;
} sgid_addr, dgid_addr;
rdma_gid2ip(&sgid_addr._sockaddr, sgid);
rdma_gid2ip(&dgid_addr._sockaddr, dgid);
memset(&dev_addr, 0, sizeof(dev_addr));
if (if_index)
dev_addr.bound_dev_if = *if_index;
dev_addr.net = TD_TO_VNET(curthread);
dev_addr.bound_dev_if = dev->if_index;
dev_addr.net = dev_net(dev);
ctx.addr = &dev_addr;
init_completion(&ctx.comp);
@ -794,43 +793,12 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
return ret;
memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
dev = dev_get_by_index(dev_addr.net, dev_addr.bound_dev_if);
if (!dev)
return -ENODEV;
if (if_index)
*if_index = dev_addr.bound_dev_if;
if (vlan_id)
*vlan_id = rdma_vlan_dev_vlan_id(dev);
if (hoplimit)
*hoplimit = dev_addr.hoplimit;
dev_put(dev);
return ret;
}
EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
{
int ret = 0;
struct rdma_dev_addr dev_addr;
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} gid_addr;
rdma_gid2ip(&gid_addr._sockaddr, sgid);
memset(&dev_addr, 0, sizeof(dev_addr));
dev_addr.net = TD_TO_VNET(curthread);
ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
if (ret)
return ret;
memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
return ret;
}
EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
int addr_init(void)
{
addr_wq = alloc_workqueue("ib_addr", WQ_MEM_RECLAIM, 0);

View File

@ -533,7 +533,7 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
int ret;
if (addr->sa_family != AF_IB) {
ret = rdma_translate_ip(addr, dev_addr, NULL);
ret = rdma_translate_ip(addr, dev_addr);
} else {
cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
ret = 0;
@ -2094,7 +2094,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
conn_id->state = RDMA_CM_CONNECT;
ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL);
ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr);
if (ret) {
mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id);

View File

@ -483,56 +483,29 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
return ret;
if (rdma_protocol_roce(device, port_num)) {
int if_index;
u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
struct ib_gid_attr dgid_attr;
const u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
wc->vlan_id : 0xffff;
struct net_device *idev;
struct net_device *resolved_dev;
if (!(wc->wc_flags & IB_WC_GRH))
return -EPROTOTYPE;
if (!device->get_netdev)
return -EOPNOTSUPP;
idev = device->get_netdev(device, port_num);
if (!idev)
return -ENODEV;
/*
* Get network interface index early on. This is
* useful for IPv6 link local addresses:
*/
if_index = idev->if_index;
ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid,
ah_attr->dmac,
wc->wc_flags & IB_WC_WITH_VLAN ?
NULL : &vlan_id,
&if_index, &hoplimit);
if (ret) {
dev_put(idev);
return ret;
}
resolved_dev = dev_get_by_index(&init_net, if_index);
if (resolved_dev->if_flags & IFF_LOOPBACK) {
dev_put(resolved_dev);
resolved_dev = idev;
dev_hold(resolved_dev);
}
rcu_read_lock();
if (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev,
resolved_dev))
ret = -EHOSTUNREACH;
rcu_read_unlock();
dev_put(idev);
dev_put(resolved_dev);
ret = get_sgid_index_from_eth(device, port_num, vlan_id,
&dgid, gid_type, &gid_index);
if (ret)
return ret;
ret = get_sgid_index_from_eth(device, port_num, vlan_id,
&dgid, gid_type, &gid_index);
ret = ib_get_cached_gid(device, port_num, gid_index, &dgid, &dgid_attr);
if (ret)
return ret;
if (dgid_attr.ndev == NULL)
return -ENODEV;
ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid, ah_attr->dmac,
dgid_attr.ndev, &hoplimit);
dev_put(dgid_attr.ndev);
if (ret)
return ret;
}
@ -1207,7 +1180,6 @@ int ib_resolve_eth_dmac(struct ib_qp *qp,
} else {
union ib_gid sgid;
struct ib_gid_attr sgid_attr;
int ifindex;
int hop_limit;
ret = ib_query_gid(qp->device,
@ -1221,12 +1193,10 @@ int ib_resolve_eth_dmac(struct ib_qp *qp,
goto out;
}
ifindex = sgid_attr.ndev->if_index;
ret = rdma_addr_find_l2_eth_by_grh(&sgid,
&qp_attr->ah_attr.grh.dgid,
qp_attr->ah_attr.dmac,
NULL, &ifindex, &hop_limit);
sgid_attr.ndev, &hop_limit);
dev_put(sgid_attr.ndev);

View File

@ -95,10 +95,10 @@ struct rdma_dev_addr {
* rdma_translate_ip - Translate a local IP address to an RDMA hardware
* address.
*
* The dev_addr->net field must be initialized.
* The dev_addr->net and dev_addr->bound_dev_if fields must be initialized.
*/
int rdma_translate_ip(const struct sockaddr *addr,
struct rdma_dev_addr *dev_addr, u16 *vlan_id);
struct rdma_dev_addr *dev_addr);
/**
* rdma_resolve_ip - Resolve source and destination IP addresses to
@ -134,10 +134,9 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
int rdma_addr_size(struct sockaddr *addr);
int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id);
int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
const union ib_gid *dgid,
u8 *smac, u16 *vlan_id, int *if_index,
u8 *smac, struct net_device *dev,
int *hoplimit);
static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)