From 9d40cf60d6875924171eafe6364d4edc69f8d4c2 Mon Sep 17 00:00:00 2001 From: Hans Petter Selasky Date: Thu, 22 Oct 2020 09:09:53 +0000 Subject: [PATCH] Factor out generic IP over infiniband, IPoIB, definitions and code into net/if_infiniband.c and net/infiniband.h . No functional change intended. Differential Revision: https://reviews.freebsd.org/D26254 Reviewed by: melifaro@ MFC after: 1 week Sponsored by: Mellanox Technologies // NVIDIA Networking --- sys/conf/files | 1 + sys/modules/Makefile | 1 + sys/modules/if_infiniband/Makefile | 10 + sys/net/if_infiniband.c | 538 ++++++++++++++++++ sys/net/infiniband.h | 80 +++ sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h | 13 +- .../drivers/infiniband/ulp/ipoib/ipoib_cm.c | 28 +- .../drivers/infiniband/ulp/ipoib/ipoib_ib.c | 10 +- .../drivers/infiniband/ulp/ipoib/ipoib_main.c | 357 +----------- 9 files changed, 663 insertions(+), 375 deletions(-) create mode 100644 sys/modules/if_infiniband/Makefile create mode 100644 sys/net/if_infiniband.c create mode 100644 sys/net/infiniband.h diff --git a/sys/conf/files b/sys/conf/files index f4a296546f1e..c938631d8af8 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -4571,6 +4571,7 @@ compat/lindebugfs/lindebugfs.c optional lindebugfs \ compile-with "${LINUXKPI_C}" # OpenFabrics Enterprise Distribution (Infiniband) +net/if_infiniband.c optional ofed ofed/drivers/infiniband/core/ib_addr.c optional ofed \ compile-with "${OFED_C}" ofed/drivers/infiniband/core/ib_agent.c optional ofed \ diff --git a/sys/modules/Makefile b/sys/modules/Makefile index 4aa82965c9b4..b0feddbad301 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -154,6 +154,7 @@ SUBDIR= \ ${_if_gif} \ ${_if_gre} \ ${_if_me} \ + if_infiniband \ if_lagg \ ${_if_ndis} \ ${_if_stf} \ diff --git a/sys/modules/if_infiniband/Makefile b/sys/modules/if_infiniband/Makefile new file mode 100644 index 000000000000..ca5e7e3f44db --- /dev/null +++ b/sys/modules/if_infiniband/Makefile @@ -0,0 +1,10 @@ +# $FreeBSD$ + +.PATH: ${SRCTOP}/sys/net + +KMOD= if_infiniband +SRCS= if_infiniband.c \ + opt_inet.h \ + opt_inet6.h + +.include diff --git a/sys/net/if_infiniband.c b/sys/net/if_infiniband.c new file mode 100644 index 000000000000..7800aa54ab35 --- /dev/null +++ b/sys/net/if_infiniband.c @@ -0,0 +1,538 @@ +/*- + * Copyright (c) 2020 Mellanox Technologies. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_inet.h" +#include "opt_inet6.h" + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include + +#ifdef INET +static inline void +infiniband_ipv4_multicast_map(uint32_t addr, + const uint8_t *broadcast, uint8_t *buf) +{ + uint8_t scope; + + addr = ntohl(addr); + scope = broadcast[5] & 0xF; + + buf[0] = 0; + buf[1] = 0xff; + buf[2] = 0xff; + buf[3] = 0xff; + buf[4] = 0xff; + buf[5] = 0x10 | scope; + buf[6] = 0x40; + buf[7] = 0x1b; + buf[8] = broadcast[8]; + buf[9] = broadcast[9]; + buf[10] = 0; + buf[11] = 0; + buf[12] = 0; + buf[13] = 0; + buf[14] = 0; + buf[15] = 0; + buf[16] = (addr >> 24) & 0xff; + buf[17] = (addr >> 16) & 0xff; + buf[18] = (addr >> 8) & 0xff; + buf[19] = addr & 0xff; +} +#endif + +#ifdef INET6 +static inline void +infiniband_ipv6_multicast_map(const struct in6_addr *addr, + const uint8_t *broadcast, uint8_t *buf) +{ + uint8_t scope; + + scope = broadcast[5] & 0xF; + + buf[0] = 0; + buf[1] = 0xff; + buf[2] = 0xff; + buf[3] = 0xff; + buf[4] = 0xff; + buf[5] = 0x10 | scope; + buf[6] = 0x60; + buf[7] = 0x1b; + buf[8] = broadcast[8]; + buf[9] = broadcast[9]; + memcpy(&buf[10], &addr->s6_addr[6], 10); +} +#endif + +/* + * This is for clients that have an infiniband_header in the mbuf. + */ +void +infiniband_bpf_mtap(struct ifnet *ifp, struct mbuf *mb) +{ + struct infiniband_header *ibh; + struct ether_header eh; + + if (mb->m_len < sizeof(*ibh)) + return; + + ibh = mtod(mb, struct infiniband_header *); + eh.ether_type = ibh->ib_protocol; + memset(eh.ether_shost, 0, ETHER_ADDR_LEN); + memcpy(eh.ether_dhost, ibh->ib_hwaddr + 4, ETHER_ADDR_LEN); + mb->m_data += sizeof(*ibh); + mb->m_len -= sizeof(*ibh); + mb->m_pkthdr.len -= sizeof(*ibh); + bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb); + mb->m_data -= sizeof(*ibh); + mb->m_len += sizeof(*ibh); + mb->m_pkthdr.len += sizeof(*ibh); +} + +/* + * Infiniband output routine. + */ +static int +infiniband_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, + struct route *ro) +{ + uint8_t edst[INFINIBAND_ADDR_LEN]; +#if defined(INET) || defined(INET6) + struct llentry *lle = NULL; +#endif + struct infiniband_header *ibh; + int error = 0; + uint16_t type; + bool is_gw; + + NET_EPOCH_ASSERT(); + + is_gw = ((ro != NULL) && (ro->ro_flags & RT_HAS_GW) != 0); + +#ifdef MAC + error = mac_ifnet_check_transmit(ifp, m); + if (error) + goto bad; +#endif + + M_PROFILE(m); + if (ifp->if_flags & IFF_MONITOR) { + error = ENETDOWN; + goto bad; + } + if (!((ifp->if_flags & IFF_UP) && + (ifp->if_drv_flags & IFF_DRV_RUNNING))) { + error = ENETDOWN; + goto bad; + } + + switch (dst->sa_family) { + case AF_LINK: + goto output; +#ifdef INET + case AF_INET: + if (lle != NULL && (lle->la_flags & LLE_VALID)) { + memcpy(edst, lle->ll_addr, sizeof(edst)); + } else if (m->m_flags & M_MCAST) { + infiniband_ipv4_multicast_map( + ((const struct sockaddr_in *)dst)->sin_addr.s_addr, + ifp->if_broadcastaddr, edst); + } else { + error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL); + if (error) { + if (error == EWOULDBLOCK) + error = 0; + m = NULL; /* mbuf is consumed by resolver */ + goto bad; + } + } + type = htons(ETHERTYPE_IP); + break; + case AF_ARP: { + struct arphdr *ah; + + if (m->m_len < sizeof(*ah)) { + error = EINVAL; + goto bad; + } + + ah = mtod(m, struct arphdr *); + + if (m->m_len < arphdr_len(ah)) { + error = EINVAL; + goto bad; + } + ah->ar_hrd = htons(ARPHRD_INFINIBAND); + + switch (ntohs(ah->ar_op)) { + case ARPOP_REVREQUEST: + case ARPOP_REVREPLY: + type = htons(ETHERTYPE_REVARP); + break; + case ARPOP_REQUEST: + case ARPOP_REPLY: + default: + type = htons(ETHERTYPE_ARP); + break; + } + + if (m->m_flags & M_BCAST) { + memcpy(edst, ifp->if_broadcastaddr, INFINIBAND_ADDR_LEN); + } else { + if (ah->ar_hln != INFINIBAND_ADDR_LEN) { + error = EINVAL; + goto bad; + } + memcpy(edst, ar_tha(ah), INFINIBAND_ADDR_LEN); + } + break; + } +#endif +#ifdef INET6 + case AF_INET6: { + const struct ip6_hdr *ip6; + + ip6 = mtod(m, const struct ip6_hdr *); + if (m->m_len < sizeof(*ip6)) { + error = EINVAL; + goto bad; + } else if (lle != NULL && (lle->la_flags & LLE_VALID)) { + memcpy(edst, lle->ll_addr, sizeof(edst)); + } else if (m->m_flags & M_MCAST) { + infiniband_ipv6_multicast_map( + &((const struct sockaddr_in6 *)dst)->sin6_addr, + ifp->if_broadcastaddr, edst); + } else if (ip6->ip6_nxt == IPPROTO_ICMPV6) { + memcpy(edst, ifp->if_broadcastaddr, INFINIBAND_ADDR_LEN); + } else { + error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL); + if (error) { + if (error == EWOULDBLOCK) + error = 0; + m = NULL; /* mbuf is consumed by resolver */ + goto bad; + } + } + type = htons(ETHERTYPE_IPV6); + break; + } +#endif + default: + error = EAFNOSUPPORT; + goto bad; + } + + /* + * Add local net header. If no space in first mbuf, + * allocate another. + */ + M_PREPEND(m, INFINIBAND_HDR_LEN, M_NOWAIT); + if (m == NULL) { + error = ENOBUFS; + goto bad; + } + ibh = mtod(m, struct infiniband_header *); + + ibh->ib_protocol = type; + memcpy(ibh->ib_hwaddr, edst, sizeof(edst)); + + /* + * Queue message on interface, update output statistics if + * successful, and start output if interface not yet active. + */ +output: + return (ifp->if_transmit(ifp, m)); +bad: + if (m != NULL) + m_freem(m); + return (error); +} + +/* + * Process a received Infiniband packet. + */ +static void +infiniband_input(struct ifnet *ifp, struct mbuf *m) +{ + struct infiniband_header *ibh; + struct epoch_tracker et; + int isr; + + CURVNET_SET_QUIET(ifp->if_vnet); + + if ((ifp->if_flags & IFF_UP) == 0) { + if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); + m_freem(m); + goto done; + } + + ibh = mtod(m, struct infiniband_header *); + + /* + * Reset layer specific mbuf flags to avoid confusing upper + * layers: + */ + m->m_flags &= ~M_VLANTAG; + m_clrprotoflags(m); + + if (INFINIBAND_IS_MULTICAST(ibh->ib_hwaddr)) { + if (memcmp(ibh->ib_hwaddr, ifp->if_broadcastaddr, + ifp->if_addrlen) == 0) + m->m_flags |= M_BCAST; + else + m->m_flags |= M_MCAST; + if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); + } + + /* Let BPF have it before we strip the header. */ + INFINIBAND_BPF_MTAP(ifp, m); + + /* Allow monitor mode to claim this frame, after stats are updated. */ + if (ifp->if_flags & IFF_MONITOR) { + m_freem(m); + goto done; + } + + /* Direct packet to correct FIB based on interface config. */ + M_SETFIB(m, ifp->if_fib); + + /* + * Dispatch frame to upper layer. + */ + switch (ibh->ib_protocol) { +#ifdef INET + case htons(ETHERTYPE_IP): + isr = NETISR_IP; + break; + + case htons(ETHERTYPE_ARP): + if (ifp->if_flags & IFF_NOARP) { + /* Discard packet if ARP is disabled on interface */ + m_freem(m); + goto done; + } + isr = NETISR_ARP; + break; +#endif +#ifdef INET6 + case htons(ETHERTYPE_IPV6): + isr = NETISR_IPV6; + break; +#endif + default: + if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); + m_freem(m); + goto done; + } + + /* Strip off the Infiniband header. */ + m_adj(m, INFINIBAND_HDR_LEN); + +#ifdef MAC + /* + * Tag the mbuf with an appropriate MAC label before any other + * consumers can get to it. + */ + mac_ifnet_create_mbuf(ifp, m); +#endif + /* Allow monitor mode to claim this frame, after stats are updated. */ + NET_EPOCH_ENTER(et); + netisr_dispatch(isr, m); + NET_EPOCH_EXIT(et); +done: + CURVNET_RESTORE(); +} + +static int +infiniband_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa, + struct sockaddr *sa) +{ + struct sockaddr_dl *sdl; +#ifdef INET + struct sockaddr_in *sin; +#endif +#ifdef INET6 + struct sockaddr_in6 *sin6; +#endif + uint8_t *e_addr; + + switch (sa->sa_family) { + case AF_LINK: + /* + * No mapping needed. Just check that it's a valid MC address. + */ + sdl = (struct sockaddr_dl *)sa; + e_addr = LLADDR(sdl); + if (!INFINIBAND_IS_MULTICAST(e_addr)) + return (EADDRNOTAVAIL); + *llsa = NULL; + return 0; + +#ifdef INET + case AF_INET: + sin = (struct sockaddr_in *)sa; + if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) + return (EADDRNOTAVAIL); + sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND); + sdl->sdl_alen = INFINIBAND_ADDR_LEN; + e_addr = LLADDR(sdl); + infiniband_ipv4_multicast_map(sin->sin_addr.s_addr, ifp->if_broadcastaddr, + e_addr); + *llsa = (struct sockaddr *)sdl; + return (0); +#endif +#ifdef INET6 + case AF_INET6: + sin6 = (struct sockaddr_in6 *)sa; + /* + * An IP6 address of 0 means listen to all of the + * multicast address used for IP6. This has no meaning + * in infiniband. + */ + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) + return (EADDRNOTAVAIL); + if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) + return (EADDRNOTAVAIL); + sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND); + sdl->sdl_alen = INFINIBAND_ADDR_LEN; + e_addr = LLADDR(sdl); + infiniband_ipv6_multicast_map(&sin6->sin6_addr, ifp->if_broadcastaddr, e_addr); + *llsa = (struct sockaddr *)sdl; + return (0); +#endif + default: + return (EAFNOSUPPORT); + } +} + +void +infiniband_ifattach(struct ifnet *ifp, const uint8_t *lla, const uint8_t *llb) +{ + struct sockaddr_dl *sdl; + struct ifaddr *ifa; + int i; + + ifp->if_addrlen = INFINIBAND_ADDR_LEN; + ifp->if_hdrlen = INFINIBAND_HDR_LEN; + ifp->if_mtu = INFINIBAND_MTU; + if_attach(ifp); + ifp->if_output = infiniband_output; + ifp->if_input = infiniband_input; + ifp->if_resolvemulti = infiniband_resolvemulti; + + if (ifp->if_baudrate == 0) + ifp->if_baudrate = IF_Gbps(10); /* default value */ + if (llb != NULL) + ifp->if_broadcastaddr = llb; + + ifa = ifp->if_addr; + KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__)); + sdl = (struct sockaddr_dl *)ifa->ifa_addr; + sdl->sdl_type = IFT_INFINIBAND; + sdl->sdl_alen = ifp->if_addrlen; + + if (lla != NULL) { + memcpy(LLADDR(sdl), lla, ifp->if_addrlen); + + if (ifp->if_hw_addr != NULL) + memcpy(ifp->if_hw_addr, lla, ifp->if_addrlen); + } else { + lla = LLADDR(sdl); + } + + /* Attach ethernet compatible network device */ + bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN); + + /* Announce Infiniband MAC address if non-zero. */ + for (i = 0; i < ifp->if_addrlen; i++) + if (lla[i] != 0) + break; + if (i != ifp->if_addrlen) + if_printf(ifp, "Infiniband address: %20D\n", lla, ":"); + + /* Add necessary bits are setup; announce it now. */ + EVENTHANDLER_INVOKE(infiniband_ifattach_event, ifp); + + if (IS_DEFAULT_VNET(curvnet)) + devctl_notify("INFINIBAND", ifp->if_xname, "IFATTACH", NULL); +} + +/* + * Perform common duties while detaching an Infiniband interface + */ +void +infiniband_ifdetach(struct ifnet *ifp) +{ + bpfdetach(ifp); + if_detach(ifp); +} + +static int +infiniband_modevent(module_t mod, int type, void *data) +{ + switch (type) { + case MOD_LOAD: + case MOD_UNLOAD: + return (0); + default: + return (EOPNOTSUPP); + } +} + +static moduledata_t infiniband_mod = { + .name = "if_infiniband", + .evhand = &infiniband_modevent, +}; + +DECLARE_MODULE(if_infiniband, infiniband_mod, SI_SUB_INIT_IF, SI_ORDER_ANY); +MODULE_VERSION(if_infiniband, 1); diff --git a/sys/net/infiniband.h b/sys/net/infiniband.h new file mode 100644 index 000000000000..b9fead61c220 --- /dev/null +++ b/sys/net/infiniband.h @@ -0,0 +1,80 @@ +/*- + * Copyright (c) 2020 Mellanox Technologies. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef __INFINIBAND_H__ +#define __INFINIBAND_H__ + +#include +#include + +#define INFINIBAND_ADDR_LEN 20 /* bytes */ +#define INFINIBAND_MTU 1500 /* bytes - default value */ + +#define INFINIBAND_ENC_LEN 4 /* bytes */ +#define INFINIBAND_HDR_LEN \ + (INFINIBAND_ADDR_LEN + INFINIBAND_ENC_LEN) + +#define INFINIBAND_IS_MULTICAST(addr) \ + ((addr)[4] == 0xff) + +#define INFINIBAND_BPF_MTAP(_ifp, _m) \ +do { \ + if (bpf_peers_present((_ifp)->if_bpf)) { \ + M_ASSERTVALID(_m); \ + infiniband_bpf_mtap(_ifp, _m); \ + } \ +} while (0) + +struct infiniband_header { + uint8_t ib_hwaddr[INFINIBAND_ADDR_LEN]; + uint16_t ib_protocol; /* big endian */ + uint16_t ib_reserved; /* zero */ +} __packed; + +struct infiniband_address { + uint8_t octet[INFINIBAND_ADDR_LEN]; +} __packed; + +#ifdef _KERNEL + +#include + +struct ifnet; +struct mbuf; + +extern void infiniband_ifattach(struct ifnet *, const uint8_t *hwaddr, const uint8_t *bcaddr); +extern void infiniband_ifdetach(struct ifnet *); +extern void infiniband_bpf_mtap(struct ifnet *, struct mbuf *); + +/* new infiniband interface attached event */ +typedef void (*infiniband_ifattach_event_handler_t)(void *, struct ifnet *); + +EVENTHANDLER_DECLARE(infiniband_ifattach_event, infiniband_ifattach_event_handler_t); + +#endif + +#endif /* __INFINIBAND_H__ */ diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h index 8f27a0dca03c..8a2b3333abb5 100644 --- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h @@ -438,16 +438,7 @@ struct ipoib_path { extern struct workqueue_struct *ipoib_workqueue; -#define IPOIB_MTAP_PROTO(_ifp, _m, _proto) \ -do { \ - if (bpf_peers_present((_ifp)->if_bpf)) { \ - M_ASSERTVALID(_m); \ - ipoib_mtap_proto((_ifp), (_m), (_proto)); \ - } \ -} while (0) - /* functions */ -void ipoib_mtap_proto(struct ifnet *ifp, struct mbuf *mb, uint16_t proto); void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr); void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr); @@ -463,8 +454,6 @@ int ipoib_open(struct ipoib_dev_priv *priv); int ipoib_add_pkey_attr(struct ipoib_dev_priv *priv); int ipoib_add_umcast_attr(struct ipoib_dev_priv *priv); -void ipoib_demux(struct ifnet *ifp, struct mbuf *m, u_short proto); - void ipoib_send(struct ipoib_dev_priv *priv, struct mbuf *mb, struct ipoib_ah *address, u32 qpn); void ipoib_reap_ah(struct work_struct *work); @@ -540,7 +529,7 @@ int ipoib_poll_tx(struct ipoib_dev_priv *priv, bool do_start); void ipoib_dma_unmap_rx(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req); void ipoib_dma_mb(struct ipoib_dev_priv *priv, struct mbuf *mb, unsigned int length); -struct mbuf *ipoib_alloc_map_mb(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req, int size); +struct mbuf *ipoib_alloc_map_mb(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req, int align, int size); void ipoib_set_ethtool_ops(struct ifnet *dev); diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 38d5886bb802..50c9a4f305e9 100644 --- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -153,7 +153,7 @@ static struct mbuf * ipoib_cm_alloc_rx_mb(struct ipoib_dev_priv *priv, struct ipoib_cm_rx_buf *rx_req) { return ipoib_alloc_map_mb(priv, (struct ipoib_rx_buf *)rx_req, - priv->cm.max_cm_mtu); + sizeof(struct ipoib_pseudoheader), priv->cm.max_cm_mtu); } static void ipoib_cm_free_rx_ring(struct ipoib_dev_priv *priv, @@ -484,9 +484,6 @@ void ipoib_cm_handle_rx_wc(struct ipoib_dev_priv *priv, struct ib_wc *wc) struct mbuf *mb, *newmb; struct ipoib_cm_rx *p; int has_srq; - u_short proto; - - CURVNET_SET_QUIET(dev->if_vnet); ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n", wr_id, wc->status); @@ -561,16 +558,24 @@ void ipoib_cm_handle_rx_wc(struct ipoib_dev_priv *priv, struct ib_wc *wc) ipoib_dma_mb(priv, mb, wc->byte_len); - if_inc_counter(dev, IFCOUNTER_IPACKETS, 1); - if_inc_counter(dev, IFCOUNTER_IBYTES, mb->m_pkthdr.len); - mb->m_pkthdr.rcvif = dev; - proto = *mtod(mb, uint16_t *); - m_adj(mb, IPOIB_ENCAP_LEN); - IPOIB_MTAP_PROTO(dev, mb, proto); - ipoib_demux(dev, mb, ntohs(proto)); + M_PREPEND(mb, sizeof(struct ipoib_pseudoheader), M_NOWAIT); + if (likely(mb != NULL)) { + struct ipoib_header *ibh; + if_inc_counter(dev, IFCOUNTER_IPACKETS, 1); + if_inc_counter(dev, IFCOUNTER_IBYTES, mb->m_pkthdr.len); + + /* fixup destination infiniband address */ + ibh = mtod(mb, struct ipoib_header *); + memset(ibh->hwaddr, 0, 4); + memcpy(ibh->hwaddr + 4, priv->local_gid.raw, sizeof(union ib_gid)); + + dev->if_input(dev, mb); + } else { + if_inc_counter(dev, IFCOUNTER_IERRORS, 1); + } repost: if (has_srq) { if (unlikely(ipoib_cm_post_receive_srq(priv, wr_id))) @@ -587,7 +592,6 @@ void ipoib_cm_handle_rx_wc(struct ipoib_dev_priv *priv, struct ib_wc *wc) } } done: - CURVNET_RESTORE(); return; } diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 9cb2e9b1f8a6..cb52e3db75a4 100644 --- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -112,17 +112,19 @@ ipoib_dma_mb(struct ipoib_dev_priv *priv, struct mbuf *mb, unsigned int length) struct mbuf * ipoib_alloc_map_mb(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req, - int size) + int align, int size) { struct mbuf *mb, *m; int i, j; rx_req->mb = NULL; - mb = m_getm2(NULL, size, M_NOWAIT, MT_DATA, M_PKTHDR); + mb = m_getm2(NULL, align + size, M_NOWAIT, MT_DATA, M_PKTHDR); if (mb == NULL) return (NULL); for (i = 0, m = mb; m != NULL; m = m->m_next, i++) { - m->m_len = M_SIZE(m); + m->m_len = M_SIZE(m) - align; + m->m_data += align; + align = 0; mb->m_pkthdr.len += m->m_len; rx_req->mapping[i] = ib_dma_map_single(priv->ca, mtod(m, void *), m->m_len, DMA_FROM_DEVICE); @@ -174,7 +176,7 @@ ipoib_alloc_rx_mb(struct ipoib_dev_priv *priv, int id) { return ipoib_alloc_map_mb(priv, &priv->rx_ring[id], - priv->max_ib_mtu + IB_GRH_BYTES); + 0, priv->max_ib_mtu + IB_GRH_BYTES); } static int ipoib_ib_post_receives(struct ipoib_dev_priv *priv) diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c index 09db0d78f24d..27f4da93ccba 100644 --- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -40,20 +40,15 @@ __FBSDID("$FreeBSD$"); #include "ipoib.h" #include -static int ipoib_resolvemulti(struct ifnet *, struct sockaddr **, - struct sockaddr *); - - #include #include #include #include -#include /* For ARPHRD_xxx */ #include -#include -#include + +#include #include @@ -98,18 +93,7 @@ static struct net_device *ipoib_get_net_dev_by_params( const union ib_gid *gid, const struct sockaddr *addr, void *client_data); static void ipoib_start(struct ifnet *dev); -static int ipoib_output(struct ifnet *ifp, struct mbuf *m, - const struct sockaddr *dst, struct route *ro); static int ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data); -static void ipoib_input(struct ifnet *ifp, struct mbuf *m); - -#define IPOIB_MTAP(_ifp, _m) \ -do { \ - if (bpf_peers_present((_ifp)->if_bpf)) { \ - M_ASSERTVALID(_m); \ - ipoib_mtap_mb((_ifp), (_m)); \ - } \ -} while (0) static struct unrhdr *ipoib_unrhdr; @@ -136,37 +120,6 @@ ipoib_unrhdr_uninit(void *arg) } SYSUNINIT(ipoib_unrhdr_uninit, SI_SUB_KLD - 1, SI_ORDER_ANY, ipoib_unrhdr_uninit, NULL); -/* - * This is for clients that have an ipoib_header in the mbuf. - */ -static void -ipoib_mtap_mb(struct ifnet *ifp, struct mbuf *mb) -{ - struct ipoib_header *ih; - struct ether_header eh; - - ih = mtod(mb, struct ipoib_header *); - eh.ether_type = ih->proto; - bcopy(ih->hwaddr, &eh.ether_dhost, ETHER_ADDR_LEN); - bzero(&eh.ether_shost, ETHER_ADDR_LEN); - mb->m_data += sizeof(struct ipoib_header); - mb->m_len -= sizeof(struct ipoib_header); - bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb); - mb->m_data -= sizeof(struct ipoib_header); - mb->m_len += sizeof(struct ipoib_header); -} - -void -ipoib_mtap_proto(struct ifnet *ifp, struct mbuf *mb, uint16_t proto) -{ - struct ether_header eh; - - eh.ether_type = proto; - bzero(&eh.ether_shost, ETHER_ADDR_LEN); - bzero(&eh.ether_dhost, ETHER_ADDR_LEN); - bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb); -} - static struct ib_client ipoib_client = { .name = "ipoib", .add = ipoib_add_one, @@ -787,7 +740,7 @@ ipoib_start_locked(struct ifnet *dev, struct ipoib_dev_priv *priv) IFQ_DRV_DEQUEUE(&dev->if_snd, mb); if (mb == NULL) break; - IPOIB_MTAP(dev, mb); + INFINIBAND_BPF_MTAP(dev, mb); ipoib_send_one(priv, mb); } } @@ -875,8 +828,7 @@ ipoib_detach(struct ipoib_dev_priv *priv) dev = priv->dev; if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { priv->gone = 1; - bpfdetach(dev); - if_detach(dev); + infiniband_ifdetach(dev); if_free(dev); free_unr(ipoib_unrhdr, priv->unit); } else @@ -935,7 +887,6 @@ struct ipoib_dev_priv * ipoib_intf_alloc(const char *name) { struct ipoib_dev_priv *priv; - struct sockaddr_dl *sdl; struct ifnet *dev; priv = ipoib_priv_alloc(); @@ -953,24 +904,17 @@ ipoib_intf_alloc(const char *name) } if_initname(dev, name, priv->unit); dev->if_flags = IFF_BROADCAST | IFF_MULTICAST; - dev->if_addrlen = INFINIBAND_ALEN; - dev->if_hdrlen = IPOIB_HEADER_LEN; - if_attach(dev); + + infiniband_ifattach(dev, NULL, priv->broadcastaddr); + dev->if_init = ipoib_init; dev->if_ioctl = ipoib_ioctl; dev->if_start = ipoib_start; - dev->if_output = ipoib_output; - dev->if_input = ipoib_input; - dev->if_resolvemulti = ipoib_resolvemulti; - dev->if_baudrate = IF_Gbps(10); - dev->if_broadcastaddr = priv->broadcastaddr; + dev->if_snd.ifq_maxlen = ipoib_sendq_size * 2; - sdl = (struct sockaddr_dl *)dev->if_addr->ifa_addr; - sdl->sdl_type = IFT_INFINIBAND; - sdl->sdl_alen = dev->if_addrlen; + priv->dev = dev; if_link_state_change(dev, LINK_STATE_DOWN); - bpfattach(dev, DLT_EN10MB, ETHER_HDR_LEN); return dev->if_softc; } @@ -1165,7 +1109,6 @@ ipoib_match_dev_addr(const struct sockaddr *addr, struct net_device *dev) struct ifaddr *ifa; int retval = 0; - CURVNET_SET(dev->if_vnet); NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &dev->if_addrhead, ifa_link) { if (ifa->ifa_addr == NULL || @@ -1179,7 +1122,6 @@ ipoib_match_dev_addr(const struct sockaddr *addr, struct net_device *dev) } } NET_EPOCH_EXIT(et); - CURVNET_RESTORE(); return (retval); } @@ -1475,286 +1417,6 @@ ipoib_cleanup_module(void) ib_sa_unregister_client(&ipoib_sa_client); destroy_workqueue(ipoib_workqueue); } - -/* - * Infiniband output routine. - */ -static int -ipoib_output(struct ifnet *ifp, struct mbuf *m, - const struct sockaddr *dst, struct route *ro) -{ - u_char edst[INFINIBAND_ALEN]; -#if defined(INET) || defined(INET6) - struct llentry *lle = NULL; -#endif - struct ipoib_header *eh; - int error = 0, is_gw = 0; - short type; - - NET_EPOCH_ASSERT(); - - if (ro != NULL) - is_gw = (ro->ro_flags & RT_HAS_GW) != 0; -#ifdef MAC - error = mac_ifnet_check_transmit(ifp, m); - if (error) - goto bad; -#endif - - M_PROFILE(m); - if (ifp->if_flags & IFF_MONITOR) { - error = ENETDOWN; - goto bad; - } - if (!((ifp->if_flags & IFF_UP) && - (ifp->if_drv_flags & IFF_DRV_RUNNING))) { - error = ENETDOWN; - goto bad; - } - - switch (dst->sa_family) { -#ifdef INET - case AF_INET: - if (lle != NULL && (lle->la_flags & LLE_VALID)) - memcpy(edst, lle->ll_addr, sizeof(edst)); - else if (m->m_flags & M_MCAST) - ip_ib_mc_map(((struct sockaddr_in *)dst)->sin_addr.s_addr, ifp->if_broadcastaddr, edst); - else - error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL); - if (error) - return (error == EWOULDBLOCK ? 0 : error); - type = htons(ETHERTYPE_IP); - break; - case AF_ARP: - { - struct arphdr *ah; - ah = mtod(m, struct arphdr *); - ah->ar_hrd = htons(ARPHRD_INFINIBAND); - - switch(ntohs(ah->ar_op)) { - case ARPOP_REVREQUEST: - case ARPOP_REVREPLY: - type = htons(ETHERTYPE_REVARP); - break; - case ARPOP_REQUEST: - case ARPOP_REPLY: - default: - type = htons(ETHERTYPE_ARP); - break; - } - - if (m->m_flags & M_BCAST) - bcopy(ifp->if_broadcastaddr, edst, INFINIBAND_ALEN); - else - bcopy(ar_tha(ah), edst, INFINIBAND_ALEN); - - } - break; -#endif -#ifdef INET6 - case AF_INET6: - if (lle != NULL && (lle->la_flags & LLE_VALID)) - memcpy(edst, lle->ll_addr, sizeof(edst)); - else if (m->m_flags & M_MCAST) - ipv6_ib_mc_map(&((struct sockaddr_in6 *)dst)->sin6_addr, ifp->if_broadcastaddr, edst); - else - error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL); - if (error) - return error; - type = htons(ETHERTYPE_IPV6); - break; -#endif - - default: - if_printf(ifp, "can't handle af%d\n", dst->sa_family); - error = EAFNOSUPPORT; - goto bad; - } - - /* - * Add local net header. If no space in first mbuf, - * allocate another. - */ - M_PREPEND(m, IPOIB_HEADER_LEN, M_NOWAIT); - if (m == NULL) { - error = ENOBUFS; - goto bad; - } - eh = mtod(m, struct ipoib_header *); - (void)memcpy(&eh->proto, &type, sizeof(eh->proto)); - (void)memcpy(&eh->hwaddr, edst, sizeof (edst)); - - /* - * Queue message on interface, update output statistics if - * successful, and start output if interface not yet active. - */ - return ((ifp->if_transmit)(ifp, m)); -bad: - if (m != NULL) - m_freem(m); - return (error); -} - -/* - * Upper layer processing for a received Infiniband packet. - */ -void -ipoib_demux(struct ifnet *ifp, struct mbuf *m, u_short proto) -{ - struct epoch_tracker et; - int isr; - -#ifdef MAC - /* - * Tag the mbuf with an appropriate MAC label before any other - * consumers can get to it. - */ - mac_ifnet_create_mbuf(ifp, m); -#endif - /* Allow monitor mode to claim this frame, after stats are updated. */ - if (ifp->if_flags & IFF_MONITOR) { - if_printf(ifp, "discard frame at IFF_MONITOR\n"); - m_freem(m); - return; - } - /* Direct packet to correct FIB based on interface config */ - M_SETFIB(m, ifp->if_fib); - /* - * Dispatch frame to upper layer. - */ - switch (proto) { -#ifdef INET - case ETHERTYPE_IP: - isr = NETISR_IP; - break; - - case ETHERTYPE_ARP: - if (ifp->if_flags & IFF_NOARP) { - /* Discard packet if ARP is disabled on interface */ - m_freem(m); - return; - } - isr = NETISR_ARP; - break; -#endif -#ifdef INET6 - case ETHERTYPE_IPV6: - isr = NETISR_IPV6; - break; -#endif - default: - goto discard; - } - NET_EPOCH_ENTER(et); - netisr_dispatch(isr, m); - NET_EPOCH_EXIT(et); - return; - -discard: - m_freem(m); -} - -/* - * Process a received Infiniband packet. - */ -static void -ipoib_input(struct ifnet *ifp, struct mbuf *m) -{ - struct ipoib_header *eh; - - if ((ifp->if_flags & IFF_UP) == 0) { - m_freem(m); - return; - } - CURVNET_SET_QUIET(ifp->if_vnet); - - /* Let BPF have it before we strip the header. */ - IPOIB_MTAP(ifp, m); - eh = mtod(m, struct ipoib_header *); - /* - * Reset layer specific mbuf flags to avoid confusing upper layers. - * Strip off Infiniband header. - */ - m->m_flags &= ~M_VLANTAG; - m_clrprotoflags(m); - m_adj(m, IPOIB_HEADER_LEN); - - if (IPOIB_IS_MULTICAST(eh->hwaddr)) { - if (memcmp(eh->hwaddr, ifp->if_broadcastaddr, - ifp->if_addrlen) == 0) - m->m_flags |= M_BCAST; - else - m->m_flags |= M_MCAST; - if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); - } - - ipoib_demux(ifp, m, ntohs(eh->proto)); - CURVNET_RESTORE(); -} - -static int -ipoib_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa, - struct sockaddr *sa) -{ - struct sockaddr_dl *sdl; -#ifdef INET - struct sockaddr_in *sin; -#endif -#ifdef INET6 - struct sockaddr_in6 *sin6; -#endif - u_char *e_addr; - - switch(sa->sa_family) { - case AF_LINK: - /* - * No mapping needed. Just check that it's a valid MC address. - */ - sdl = (struct sockaddr_dl *)sa; - e_addr = LLADDR(sdl); - if (!IPOIB_IS_MULTICAST(e_addr)) - return EADDRNOTAVAIL; - *llsa = NULL; - return 0; - -#ifdef INET - case AF_INET: - sin = (struct sockaddr_in *)sa; - if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) - return EADDRNOTAVAIL; - sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND); - sdl->sdl_alen = INFINIBAND_ALEN; - e_addr = LLADDR(sdl); - ip_ib_mc_map(sin->sin_addr.s_addr, ifp->if_broadcastaddr, - e_addr); - *llsa = (struct sockaddr *)sdl; - return 0; -#endif -#ifdef INET6 - case AF_INET6: - sin6 = (struct sockaddr_in6 *)sa; - /* - * An IP6 address of 0 means listen to all - * of the multicast address used for IP6. - * This has no meaning in ipoib. - */ - if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) - return EADDRNOTAVAIL; - if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) - return EADDRNOTAVAIL; - sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND); - sdl->sdl_alen = INFINIBAND_ALEN; - e_addr = LLADDR(sdl); - ipv6_ib_mc_map(&sin6->sin6_addr, ifp->if_broadcastaddr, e_addr); - *llsa = (struct sockaddr *)sdl; - return 0; -#endif - - default: - return EAFNOSUPPORT; - } -} - module_init_order(ipoib_init_module, SI_ORDER_FIFTH); module_exit_order(ipoib_cleanup_module, SI_ORDER_FIFTH); @@ -1771,4 +1433,5 @@ static moduledata_t ipoib_mod = { DECLARE_MODULE(ipoib, ipoib_mod, SI_SUB_LAST, SI_ORDER_ANY); MODULE_DEPEND(ipoib, ibcore, 1, 1, 1); +MODULE_DEPEND(ipoib, if_infiniband, 1, 1, 1); MODULE_DEPEND(ipoib, linuxkpi, 1, 1, 1);