Implement get network interface by params function in ipoib.

Also fix the validate_ipv4_net_dev() and validate_ipv6_net_dev() functions
which had source and destination addresses swapped, and didn't set the
scope ID for IPv6 link-local addresses.

This allows applications like krping to work using IPoIB devices.

MFC after:		3 days
Approved by:		re (gjb)
Sponsored by:		Mellanox Technologies
This commit is contained in:
Hans Petter Selasky 2018-09-07 18:05:09 +00:00
parent 5b029767e2
commit 86156495f4
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=338526
3 changed files with 197 additions and 39 deletions

View File

@ -1263,10 +1263,10 @@ static bool validate_ipv4_net_dev(struct net_device *net_dev,
const struct sockaddr_in *src_addr)
{
#ifdef INET
struct sockaddr_in dst_tmp = *dst_addr;
struct sockaddr_in src_tmp = *src_addr;
__be32 daddr = dst_addr->sin_addr.s_addr,
saddr = src_addr->sin_addr.s_addr;
struct net_device *src_dev;
struct net_device *dst_dev;
struct rtentry *rte;
bool ret;
@ -1276,29 +1276,29 @@ static bool validate_ipv4_net_dev(struct net_device *net_dev,
ipv4_is_loopback(saddr))
return false;
src_dev = ip_dev_find(net_dev->if_vnet, saddr);
if (src_dev != net_dev) {
if (src_dev != NULL)
dev_put(src_dev);
dst_dev = ip_dev_find(net_dev->if_vnet, daddr);
if (dst_dev != net_dev) {
if (dst_dev != NULL)
dev_put(dst_dev);
return false;
}
dev_put(src_dev);
dev_put(dst_dev);
/*
* Make sure the socket address length field
* is set, else rtalloc1() will fail.
*/
dst_tmp.sin_len = sizeof(dst_tmp);
src_tmp.sin_len = sizeof(src_tmp);
CURVNET_SET(net_dev->if_vnet);
rte = rtalloc1((struct sockaddr *)&dst_tmp, 1, 0);
CURVNET_RESTORE();
rte = rtalloc1((struct sockaddr *)&src_tmp, 1, 0);
if (rte != NULL) {
ret = (rte->rt_ifp == net_dev);
RTFREE_LOCKED(rte);
} else {
ret = false;
}
CURVNET_RESTORE();
return ret;
#else
return false;
@ -1310,31 +1310,42 @@ static bool validate_ipv6_net_dev(struct net_device *net_dev,
const struct sockaddr_in6 *src_addr)
{
#ifdef INET6
struct sockaddr_in6 dst_tmp = *dst_addr;
struct in6_addr in6_addr = src_addr->sin6_addr;
struct net_device *src_dev;
struct sockaddr_in6 src_tmp = *src_addr;
struct in6_addr in6_addr = dst_addr->sin6_addr;
struct net_device *dst_dev;
struct rtentry *rte;
bool ret;
src_dev = ip6_dev_find(net_dev->if_vnet, in6_addr);
if (src_dev != net_dev)
dst_dev = ip6_dev_find(net_dev->if_vnet, in6_addr);
if (dst_dev != net_dev) {
if (dst_dev != NULL)
dev_put(dst_dev);
return false;
}
CURVNET_SET(net_dev->if_vnet);
/*
* Make sure the socket address length field
* is set, else rtalloc1() will fail.
*/
dst_tmp.sin6_len = sizeof(dst_tmp);
src_tmp.sin6_len = sizeof(src_tmp);
CURVNET_SET(net_dev->if_vnet);
rte = rtalloc1((struct sockaddr *)&dst_tmp, 1, 0);
CURVNET_RESTORE();
/*
* Make sure the scope ID gets embedded, else rtalloc1() will
* resolve to the loopback interface.
*/
src_tmp.sin6_scope_id = net_dev->if_index;
sa6_embedscope(&src_tmp, 0);
rte = rtalloc1((struct sockaddr *)&src_tmp, 1, 0);
if (rte != NULL) {
ret = (rte->rt_ifp == net_dev);
RTFREE_LOCKED(rte);
} else {
ret = false;
}
CURVNET_RESTORE();
return ret;
#else
return false;

View File

@ -149,16 +149,6 @@ roce_gid_enum_netdev_default(struct ib_device *ib_dev,
return (hweight_long(gid_type_mask));
}
#define ETH_IPOIB_DRV_NAME "ib"
static inline int
is_eth_ipoib_intf(struct net_device *dev)
{
if (strcmp(dev->if_dname, ETH_IPOIB_DRV_NAME))
return 0;
return 1;
}
static void
roce_gid_update_addr_callback(struct ib_device *device, u8 port,
struct net_device *ndev, void *cookie)
@ -322,15 +312,15 @@ roce_gid_queue_scan_event(struct net_device *ndev)
struct roce_netdev_event_work *work;
retry:
if (is_eth_ipoib_intf(ndev))
return;
if (ndev->if_type != IFT_ETHER) {
if (ndev->if_type == IFT_L2VLAN) {
ndev = rdma_vlan_dev_real_dev(ndev);
if (ndev != NULL)
goto retry;
}
switch (ndev->if_type) {
case IFT_ETHER:
break;
case IFT_L2VLAN:
ndev = rdma_vlan_dev_real_dev(ndev);
if (ndev != NULL)
goto retry;
/* FALLTHROUGH */
default:
return;
}

View File

@ -54,6 +54,8 @@ static int ipoib_resolvemulti(struct ifnet *, struct sockaddr **,
#include <net/ip.h>
#include <net/ipv6.h>
#include <rdma/ib_cache.h>
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
MODULE_LICENSE("Dual BSD/GPL");
@ -90,6 +92,10 @@ struct ib_sa_client ipoib_sa_client;
static void ipoib_add_one(struct ib_device *device);
static void ipoib_remove_one(struct ib_device *device, void *client_data);
static struct net_device *ipoib_get_net_dev_by_params(
struct ib_device *dev, u8 port, u16 pkey,
const union ib_gid *gid, const struct sockaddr *addr,
void *client_data);
static void ipoib_start(struct ifnet *dev);
static int ipoib_output(struct ifnet *ifp, struct mbuf *m,
const struct sockaddr *dst, struct route *ro);
@ -163,7 +169,8 @@ ipoib_mtap_proto(struct ifnet *ifp, struct mbuf *mb, uint16_t proto)
static struct ib_client ipoib_client = {
.name = "ipoib",
.add = ipoib_add_one,
.remove = ipoib_remove_one
.remove = ipoib_remove_one,
.get_net_dev_by_params = ipoib_get_net_dev_by_params,
};
int
@ -1113,6 +1120,156 @@ ipoib_remove_one(struct ib_device *device, void *client_data)
kfree(dev_list);
}
static int
ipoib_match_dev_addr(const struct sockaddr *addr, struct net_device *dev)
{
struct ifaddr *ifa;
int retval = 0;
CURVNET_SET(dev->if_vnet);
IF_ADDR_RLOCK(dev);
CK_STAILQ_FOREACH(ifa, &dev->if_addrhead, ifa_link) {
if (ifa->ifa_addr == NULL ||
ifa->ifa_addr->sa_family != addr->sa_family ||
ifa->ifa_addr->sa_len != addr->sa_len) {
continue;
}
if (memcmp(ifa->ifa_addr, addr, addr->sa_len) == 0) {
retval = 1;
break;
}
}
IF_ADDR_RUNLOCK(dev);
CURVNET_RESTORE();
return (retval);
}
/*
* ipoib_match_gid_pkey_addr - returns the number of IPoIB netdevs on
* top a given ipoib device matching a pkey_index and address, if one
* exists.
*
* @found_net_dev: contains a matching net_device if the return value
* >= 1, with a reference held.
*/
static int
ipoib_match_gid_pkey_addr(struct ipoib_dev_priv *priv,
const union ib_gid *gid, u16 pkey_index, const struct sockaddr *addr,
struct net_device **found_net_dev)
{
struct ipoib_dev_priv *child_priv;
int matches = 0;
if (priv->pkey_index == pkey_index &&
(!gid || !memcmp(gid, &priv->local_gid, sizeof(*gid)))) {
if (addr == NULL || ipoib_match_dev_addr(addr, priv->dev) != 0) {
if (*found_net_dev == NULL) {
struct net_device *net_dev;
if (priv->parent != NULL)
net_dev = priv->parent;
else
net_dev = priv->dev;
*found_net_dev = net_dev;
dev_hold(net_dev);
}
matches++;
}
}
/* Check child interfaces */
mutex_lock(&priv->vlan_mutex);
list_for_each_entry(child_priv, &priv->child_intfs, list) {
matches += ipoib_match_gid_pkey_addr(child_priv, gid,
pkey_index, addr, found_net_dev);
if (matches > 1)
break;
}
mutex_unlock(&priv->vlan_mutex);
return matches;
}
/*
* __ipoib_get_net_dev_by_params - returns the number of matching
* net_devs found (between 0 and 2). Also return the matching
* net_device in the @net_dev parameter, holding a reference to the
* net_device, if the number of matches >= 1
*/
static int
__ipoib_get_net_dev_by_params(struct list_head *dev_list, u8 port,
u16 pkey_index, const union ib_gid *gid,
const struct sockaddr *addr, struct net_device **net_dev)
{
struct ipoib_dev_priv *priv;
int matches = 0;
*net_dev = NULL;
list_for_each_entry(priv, dev_list, list) {
if (priv->port != port)
continue;
matches += ipoib_match_gid_pkey_addr(priv, gid, pkey_index,
addr, net_dev);
if (matches > 1)
break;
}
return matches;
}
static struct net_device *
ipoib_get_net_dev_by_params(struct ib_device *dev, u8 port, u16 pkey,
const union ib_gid *gid, const struct sockaddr *addr, void *client_data)
{
struct net_device *net_dev;
struct list_head *dev_list = client_data;
u16 pkey_index;
int matches;
int ret;
if (!rdma_protocol_ib(dev, port))
return NULL;
ret = ib_find_cached_pkey(dev, port, pkey, &pkey_index);
if (ret)
return NULL;
if (!dev_list)
return NULL;
/* See if we can find a unique device matching the L2 parameters */
matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index,
gid, NULL, &net_dev);
switch (matches) {
case 0:
return NULL;
case 1:
return net_dev;
}
dev_put(net_dev);
/* Couldn't find a unique device with L2 parameters only. Use L3
* address to uniquely match the net device */
matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index,
gid, addr, &net_dev);
switch (matches) {
case 0:
return NULL;
default:
dev_warn_ratelimited(&dev->dev,
"duplicate IP address detected\n");
/* Fall through */
case 1:
return net_dev;
}
}
static void
ipoib_config_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
{