Multiple fixes for using IPv6 link-local addresses with RDMA in ibcore.

1) Fail to resolve RDMA address if rtalloc1() returns the loopback
device, lo0, as the gateway interface. Currently RDMA loopback is
not supported.

2) Use ip_dev_find() and ip6_dev_find() to lookup network interfaces
with matching IPv4 and IPv6 addresses, respectivly.

3) In addr_resolve() make sure the "ifa" pointer is always set, also when
the "ifp" is NULL. Else a NULL pointer access might happen trying to
read from the "ifa" pointer later on.

4) In rdma_addr_find_dmac_by_grh() make sure the "bound_dev_if" field
gets set properly instead of passing the scope ID through the IPv6
socket address structure. This is more in line with upstream OFED
in Linux.

5) In rdma_addr_find_smac_by_sgid() there is no need to pass the
scope ID for IPv6. Either it is stored in the "bound_dev_if" field
or ip6_dev_find() will find the correct network device regardless
of the scope ID.

Sponsored by:	Mellanox Technologies
MFC after:	1 week
This commit is contained in:
Hans Petter Selasky 2017-11-09 19:22:43 +00:00
parent da1bfa506f
commit 860bbba0bb
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=325614
5 changed files with 103 additions and 164 deletions

View File

@ -110,14 +110,6 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev,
}
EXPORT_SYMBOL(rdma_copy_addr);
#define SCOPE_ID_CACHE(_scope_id, _addr6) do { \
(_addr6)->sin6_addr.s6_addr[3] = (_scope_id); \
(_addr6)->sin6_scope_id = 0; } while (0)
#define SCOPE_ID_RESTORE(_scope_id, _addr6) do { \
(_addr6)->sin6_scope_id = (_scope_id); \
(_addr6)->sin6_addr.s6_addr[3] = 0; } while (0)
int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
u16 *vlan_id)
{
@ -149,34 +141,17 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
#if defined(INET6)
case AF_INET6:
{
struct sockaddr_in6 *sin6;
struct ifaddr *ifa;
in_port_t port;
uint32_t scope_id;
dev = ip6_dev_find(&init_net,
((const struct sockaddr_in6 *)addr)->sin6_addr);
sin6 = (struct sockaddr_in6 *)addr;
port = sin6->sin6_port;
sin6->sin6_port = 0;
scope_id = sin6->sin6_scope_id;
if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
SCOPE_ID_CACHE(scope_id, sin6);
CURVNET_SET_QUIET(&init_net);
ifa = ifa_ifwithaddr(addr);
CURVNET_RESTORE();
sin6->sin6_port = port;
if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
SCOPE_ID_RESTORE(scope_id, sin6);
if (ifa == NULL) {
ret = -ENODEV;
break;
}
ret = rdma_copy_addr(dev_addr, ifa->ifa_ifp, NULL);
if (vlan_id)
*vlan_id = rdma_vlan_dev_vlan_id(ifa->ifa_ifp);
ifa_free(ifa);
break;
}
if (!dev)
return ret;
ret = rdma_copy_addr(dev_addr, dev, NULL);
if (vlan_id)
*vlan_id = rdma_vlan_dev_vlan_id(dev);
dev_put(dev);
break;
#endif
default:
break;
@ -222,11 +197,8 @@ static int addr_resolve(struct sockaddr *src_in,
struct ifaddr *ifa;
struct ifnet *ifp;
struct rtentry *rte;
#if defined(INET) || defined(INET6)
in_port_t port;
#endif
#ifdef INET6
uint32_t scope_id;
#if defined(INET6)
struct sockaddr_in6 dstv6_tmp;
#endif
u_char edst[MAX_ADDR_LEN];
int multi;
@ -247,11 +219,7 @@ static int addr_resolve(struct sockaddr *src_in,
ifp = NULL;
rte = NULL;
ifa = NULL;
ifp = NULL;
memset(edst, 0, sizeof(edst));
#ifdef INET6
scope_id = -1U;
#endif
switch (dst_in->sa_family) {
#ifdef INET
@ -263,29 +231,11 @@ static int addr_resolve(struct sockaddr *src_in,
multi = 1;
sin = (struct sockaddr_in *)src_in;
if (sin->sin_addr.s_addr != INADDR_ANY) {
/*
* Address comparison fails if the port is set
* cache it here to be restored later.
*/
port = sin->sin_port;
sin->sin_port = 0;
memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
/*
* If we have a source address to use look it
* up first and verify that it is a local
* interface:
*/
CURVNET_SET_QUIET(&init_net);
ifa = ifa_ifwithaddr(src_in);
CURVNET_RESTORE();
sin->sin_port = port;
if (ifa == NULL) {
ifp = ip_dev_find(&init_net, sin->sin_addr.s_addr);
if (ifp == NULL) {
error = ENETUNREACH;
goto done;
}
ifp = ifa->ifa_ifp;
ifa_free(ifa);
if (bcast || multi)
goto mcast;
}
@ -293,42 +243,26 @@ static int addr_resolve(struct sockaddr *src_in,
#endif
#ifdef INET6
case AF_INET6:
/* Make destination socket address writeable */
dstv6_tmp = *(struct sockaddr_in6 *)dst_in;
dst_in = (struct sockaddr *)&dstv6_tmp;
sin6 = (struct sockaddr_in6 *)dst_in;
if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
multi = 1;
if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) {
/*
* The IB address comparison fails if the
* scope ID is set and not part of the addr:
*/
scope_id = sin6->sin6_scope_id;
if (scope_id < 256)
SCOPE_ID_CACHE(scope_id, sin6);
}
/*
* Make sure the scope ID gets embedded, else rtalloc1() will
* resolve to the loopback interface.
*/
sin6->sin6_scope_id = addr->bound_dev_if;
sa6_embedscope(sin6, 0);
sin6 = (struct sockaddr_in6 *)src_in;
if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
port = sin6->sin6_port;
sin6->sin6_port = 0;
if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) {
if (scope_id < 256)
SCOPE_ID_CACHE(scope_id, sin6);
}
/*
* If we have a source address to use look it
* up first and verify that it is a local
* interface:
*/
CURVNET_SET_QUIET(&init_net);
ifa = ifa_ifwithaddr(src_in);
CURVNET_RESTORE();
sin6->sin6_port = port;
if (ifa == NULL) {
ifp = ip6_dev_find(&init_net, sin6->sin6_addr);
if (ifp == NULL) {
error = ENETUNREACH;
goto done;
}
ifp = ifa->ifa_ifp;
ifa_free(ifa);
if (bcast || multi)
goto mcast;
}
@ -342,9 +276,13 @@ static int addr_resolve(struct sockaddr *src_in,
* Make sure the route exists and has a valid link.
*/
rte = rtalloc1(dst_in, 1, 0);
if (rte == NULL || rte->rt_ifp == NULL || !RT_LINK_IS_UP(rte->rt_ifp)) {
if (rte)
if (rte == NULL || rte->rt_ifp == NULL ||
RT_LINK_IS_UP(rte->rt_ifp) == 0 ||
rte->rt_ifp == V_loif) {
if (rte != NULL) {
RTFREE_LOCKED(rte);
rte = NULL;
}
error = EHOSTUNREACH;
goto done;
}
@ -356,20 +294,27 @@ static int addr_resolve(struct sockaddr *src_in,
* correct interface pointer and unlock the route.
*/
if (multi || bcast) {
/* rt_ifa holds the route answer source address */
ifa = rte->rt_ifa;
if (ifp == NULL) {
ifp = rte->rt_ifp;
/* rt_ifa holds the route answer source address */
ifa = rte->rt_ifa;
dev_hold(ifp);
}
RTFREE_LOCKED(rte);
} else if (ifp && ifp != rte->rt_ifp) {
rte = NULL;
} else if (ifp != NULL && ifp != rte->rt_ifp) {
RTFREE_LOCKED(rte);
rte = NULL;
error = ENETUNREACH;
goto done;
} else {
/* rt_ifa holds the route answer source address */
ifa = rte->rt_ifa;
if (ifp == NULL) {
ifp = rte->rt_ifp;
ifa = rte->rt_ifa;
dev_hold(ifp);
}
RT_UNLOCK(rte);
}
@ -418,23 +363,17 @@ static int addr_resolve(struct sockaddr *src_in,
error = EINVAL;
break;
}
RTFREE(rte);
done:
if (error == 0)
error = -rdma_copy_addr(addr, ifp, edst);
if (error == 0)
memcpy(src_in, ifa->ifa_addr, ip_addr_size(ifa->ifa_addr));
#ifdef INET6
if (scope_id < 256) {
sin6 = (struct sockaddr_in6 *)src_in;
if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
SCOPE_ID_RESTORE(scope_id, sin6);
sin6 = (struct sockaddr_in6 *)dst_in;
SCOPE_ID_RESTORE(scope_id, sin6);
}
#endif
if (error == EWOULDBLOCK)
error = ENODATA;
if (rte != NULL)
RTFREE(rte);
if (ifp != NULL)
dev_put(ifp);
CURVNET_RESTORE();
return -error;
@ -567,7 +506,7 @@ static void resolve_cb(int status, struct sockaddr *src_addr,
}
int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac,
u16 *vlan_id, u32 scope_id)
u16 *vlan_id, int *if_index)
{
int ret = 0;
struct rdma_dev_addr dev_addr;
@ -580,16 +519,17 @@ int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac,
struct sockaddr_in6 _sockaddr_in6;
} sgid_addr, dgid_addr;
ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid, scope_id);
ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid);
if (ret)
return ret;
ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid, scope_id);
ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid);
if (ret)
return ret;
memset(&dev_addr, 0, sizeof(dev_addr));
if (if_index)
dev_addr.bound_dev_if = *if_index;
ctx.addr = &dev_addr;
init_completion(&ctx.comp);
@ -611,23 +551,7 @@ int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac,
}
EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
u32 rdma_get_ipv6_scope_id(struct ib_device *ib, u8 port_num)
{
#ifdef INET6
struct ifnet *ifp;
if (ib->get_netdev == NULL)
return (-1U);
ifp = ib->get_netdev(ib, port_num);
if (ifp == NULL)
return (-1U);
return (in6_getscopezone(ifp, IPV6_ADDR_SCOPE_LINKLOCAL));
#else
return (-1U);
#endif
}
int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id,
u32 scope_id)
int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
{
int ret = 0;
struct rdma_dev_addr dev_addr;
@ -637,7 +561,7 @@ int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id,
struct sockaddr_in6 _sockaddr_in6;
} gid_addr;
ret = rdma_gid2ip(&gid_addr._sockaddr, sgid, scope_id);
ret = rdma_gid2ip(&gid_addr._sockaddr, sgid);
if (ret)
return ret;
memset(&dev_addr, 0, sizeof(dev_addr));

View File

@ -51,6 +51,9 @@
#include <net/tcp.h>
#include <net/ipv6.h>
#include <netinet6/scope6_var.h>
#include <netinet6/ip6_var.h>
#include <rdma/rdma_cm.h>
#include <rdma/rdma_cm_ib.h>
#include <rdma/ib_cache.h>
@ -710,11 +713,7 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
== RDMA_TRANSPORT_IB &&
rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)
== IB_LINK_LAYER_ETHERNET) {
u32 scope_id = rdma_get_ipv6_scope_id(id_priv->id.device,
id_priv->id.port_num);
ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL,
scope_id);
ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL);
if (ret)
goto out;
}
@ -1452,19 +1451,16 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
goto err3;
if (is_iboe && !is_sidr) {
u32 scope_id = rdma_get_ipv6_scope_id(cm_id->device,
ib_event->param.req_rcvd.port);
if (ib_event->param.req_rcvd.primary_path != NULL)
rdma_addr_find_smac_by_sgid(
&ib_event->param.req_rcvd.primary_path->sgid,
psmac, NULL, scope_id);
psmac, NULL);
else
psmac = NULL;
if (ib_event->param.req_rcvd.alternate_path != NULL)
rdma_addr_find_smac_by_sgid(
&ib_event->param.req_rcvd.alternate_path->sgid,
palt_smac, NULL, scope_id);
palt_smac, NULL);
else
palt_smac = NULL;
}
@ -2311,8 +2307,12 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
src_addr->sa_family = dst_addr->sa_family;
#ifdef INET6
if (dst_addr->sa_family == AF_INET6) {
((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr;
struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr;
src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
if (IN6_IS_SCOPE_LINKLOCAL(&dst_addr6->sin6_addr) ||
IN6_IS_ADDR_MC_INTFACELOCAL(&dst_addr6->sin6_addr))
id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id;
}
#endif
}
@ -2666,20 +2666,23 @@ static int cma_get_port(struct rdma_id_private *id_priv)
static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
struct sockaddr *addr)
{
#if defined(INET6)
struct sockaddr_in6 *sin6;
#ifdef INET6
struct sockaddr_in6 sin6;
if (addr->sa_family != AF_INET6)
return 0;
sin6 = (struct sockaddr_in6 *) addr;
if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) &&
!sin6->sin6_scope_id)
return -EINVAL;
sin6 = *(struct sockaddr_in6 *)addr;
dev_addr->bound_dev_if = sin6->sin6_scope_id;
if (IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr) ||
IN6_IS_ADDR_MC_INTFACELOCAL(&sin6.sin6_addr)) {
/* check if IPv6 scope ID is set */
if (sa6_recoverscope(&sin6) || sin6.sin6_scope_id == 0)
return -EINVAL;
dev_addr->bound_dev_if = sin6.sin6_scope_id;
}
#endif
return 0;
return (0);
}
int rdma_listen(struct rdma_cm_id *id, int backlog)

View File

@ -2094,13 +2094,23 @@ static ssize_t __uverbs_modify_qp(struct ib_uverbs_file *file,
attr->smac);
attr->vlan_id = rdma_get_vlan_id(&sgid);
} else {
struct net_device *idev;
int if_index;
if (qp->device->get_netdev != NULL &&
(idev = qp->device->get_netdev(qp->device, port_num)) != NULL)
if_index = idev->if_index;
else
if_index = 0;
ret = rdma_addr_find_dmac_by_grh(&sgid, dgid,
attr->ah_attr.dmac,
&attr->vlan_id, -1U);
&attr->vlan_id,
&if_index);
if (ret)
goto out;
ret = rdma_addr_find_smac_by_sgid(&sgid, attr->smac,
NULL, -1U);
NULL);
if (ret)
goto out;
}

View File

@ -41,6 +41,7 @@
#include <linux/module.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
@ -207,10 +208,18 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
ah_attr->vlan_id = wc->vlan_id;
} else {
u32 scope_id = rdma_get_ipv6_scope_id(device, port_num);
struct net_device *idev;
int if_index;
if (device->get_netdev != NULL &&
(idev = device->get_netdev(device, port_num)) != NULL)
if_index = idev->if_index;
else
if_index = 0;
ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid,
ah_attr->dmac, &ah_attr->vlan_id,
scope_id);
&if_index);
if (ret)
return ret;
}

View File

@ -105,10 +105,9 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr);
int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
const unsigned char *dst_dev_addr);
int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id,
u32 scope_id);
int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id);
int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *smac,
u16 *vlan_id, u32 scope_id);
u16 *vlan_id, int *if_index);
static inline int ip_addr_size(struct sockaddr *addr)
{
@ -171,8 +170,7 @@ static inline int rdma_ip2gid(struct sockaddr *addr, union ib_gid *gid)
}
/* Important - sockaddr should be a union of sockaddr_in and sockaddr_in6 */
static inline int rdma_gid2ip(struct sockaddr *out, union ib_gid *gid,
uint32_t scope_id)
static inline int rdma_gid2ip(struct sockaddr *out, union ib_gid *gid)
{
if (ipv6_addr_v4mapped((struct in6_addr *)gid)) {
struct sockaddr_in *out_in = (struct sockaddr_in *)out;
@ -186,15 +184,10 @@ static inline int rdma_gid2ip(struct sockaddr *out, union ib_gid *gid,
out_in->sin6_len = sizeof(*out_in);
out_in->sin6_family = AF_INET6;
memcpy(&out_in->sin6_addr.s6_addr, gid->raw, 16);
if (scope_id < 256 &&
IN6_IS_SCOPE_LINKLOCAL(&out_in->sin6_addr))
out_in->sin6_scope_id = scope_id;
}
return 0;
}
u32 rdma_get_ipv6_scope_id(struct ib_device *ib, u8 port_num);
/* This func is called only in loopback ip address (127.0.0.1)
* case in which sgid is not relevant
*/