Remove no longer supported mthca driver.

Sponsored by: Mellanox Technologies
2017-11-13 10:59:38 +00:00 · 2017-11-13 10:59:38 +00:00 · 8dee9a7a44
commit 8dee9a7a44
parent 8cc487045e
40 changed files with 0 additions and 16017 deletions
--- a/share/mk/bsd.libnames.mk
+++ b/share/mk/bsd.libnames.mk
@ -109,7 +109,6 @@ LIBMILTER?=	${LIBDESTDIR}${LIBDIR_BASE}/libmilter.a
 LIBMLX4?=	${LIBDESTDIR}${LIBDIR_BASE}/libmlx4.a
 LIBMP?=		${LIBDESTDIR}${LIBDIR_BASE}/libmp.a
 LIBMT?=		${LIBDESTDIR}${LIBDIR_BASE}/libmt.a
 LIBMTHCA?=	${LIBDESTDIR}${LIBDIR_BASE}/libmthca.a
 LIBNANDFS?=	${LIBDESTDIR}${LIBDIR_BASE}/libnandfs.a
 LIBNCURSES?=	${LIBDESTDIR}${LIBDIR_BASE}/libncurses.a
 LIBNCURSESW?=	${LIBDESTDIR}${LIBDIR_BASE}/libncursesw.a
--- a/share/mk/src.libnames.mk
+++ b/share/mk/src.libnames.mk
@ -199,7 +199,6 @@ _LIBRARIES+= \
 		ibumad \
 		ibverbs \
 		mlx4 \
 		mthca \
 		opensm \
 		osmcomp \
 		osmvendor \
@ -336,7 +335,6 @@ _DP_ibcm=	ibverbs
 _DP_ibmad=	ibcommon ibumad
 _DP_ibumad=	ibcommon
 _DP_mlx4=	ibverbs pthread
 _DP_mthca=	ibverbs pthread
 _DP_opensm=	pthread
 _DP_osmcomp=	pthread
 _DP_osmvendor=	ibumad opensm osmcomp pthread
@ -488,7 +486,6 @@ LIBIBMADDIR=	${OBJTOP}/contrib/ofed/usr.lib/libibmad
 LIBIBUMADDIR=	${OBJTOP}/contrib/ofed/usr.lib/libibumad
 LIBIBVERBSDIR=	${OBJTOP}/contrib/ofed/usr.lib/libibverbs
 LIBMLX4DIR=	${OBJTOP}/contrib/ofed/usr.lib/libmlx4
 LIBMTHCADIR=	${OBJTOP}/contrib/ofed/usr.lib/libmthca
 LIBOPENSMDIR=	${OBJTOP}/contrib/ofed/usr.lib/libopensm
 LIBOSMCOMPDIR=	${OBJTOP}/contrib/ofed/usr.lib/libosmcomp
 LIBOSMVENDORDIR=	${OBJTOP}/contrib/ofed/usr.lib/libosmvendor
--- a/sys/amd64/conf/NOTES
+++ b/sys/amd64/conf/NOTES
@ -317,7 +317,6 @@ options 	DRM_DEBUG	# Include debug printfs (slow)
 # ixlv:	Intel XL710 40Gbe VF PCIE Ethernet
 # mlx4ib: Mellanox ConnectX HCA InfiniBand
 # mlx4en: Mellanox ConnectX HCA Ethernet
 # mthca: Mellanox HCA InfiniBand
 # nfe:	nVidia nForce MCP on-board Ethernet Networking (BSD open source)
 # sfxge: Solarflare SFC9000 family 10Gb Ethernet adapters
 # vmx:	VMware VMXNET3 Ethernet (BSD open source)
@ -338,7 +337,6 @@ device		ixlv		# Intel XL710 40Gbe VF PCIE Ethernet
 device  	mlx4		# Shared code module between IB and Ethernet
 device  	mlx4ib		# Mellanox ConnectX HCA InfiniBand
 device  	mlx4en		# Mellanox ConnectX HCA Ethernet
 device  	mthca		# Mellanox HCA InfiniBand
 device		nfe		# nVidia nForce MCP on-board Ethernet
 device		sfxge		# Solarflare SFC9000 10Gb Ethernet
 device		vmx		# VMware VMXNET3 Ethernet
--- a/sys/conf/files
+++ b/sys/conf/files
@ -4696,43 +4696,6 @@ dev/mlx5/mlx5_en/mlx5_en_rx.c			optional mlx5en pci inet inet6	\
 dev/mlx5/mlx5_en/mlx5_en_txrx.c			optional mlx5en pci inet inet6	\
 	compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_allocator.c	optional mthca	\
 	compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_av.c		optional mthca	\
 	compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_catas.c		optional mthca	\
 	compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_cmd.c		optional mthca	\
 	compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_cq.c		optional mthca	\
 	compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_eq.c		optional mthca	\
 	compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_mad.c		optional mthca	\
 	compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_main.c		optional mthca	\
 	compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_mcg.c		optional mthca	\
 	compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_memfree.c	optional mthca	\
 	compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_mr.c		optional mthca	\
 	compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_pd.c		optional mthca	\
 	compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_profile.c	optional mthca	\
 	compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_provider.c	optional mthca	\
 	compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_qp.c		optional mthca	\
 	compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_reset.c		optional mthca	\
 	compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_srq.c		optional mthca	\
 	compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_uar.c		optional mthca	\
 	compile-with "${OFED_C}"
 # crypto support
 opencrypto/cast.c		optional crypto | ipsec | ipsec_support
 opencrypto/criov.c		optional crypto | ipsec | ipsec_support
--- a/sys/i386/conf/NOTES
+++ b/sys/i386/conf/NOTES
@ -555,7 +555,6 @@ hint.mse.0.irq="5"
 #	Requires the iwn firmware module
 # mlx4ib: Mellanox ConnectX HCA InfiniBand
 # mlx4en: Mellanox ConnectX HCA Ethernet
 # mthca: Mellanox HCA InfiniBand
 # nfe:	nVidia nForce MCP on-board Ethernet Networking (BSD open source)
 # sbni: Granch SBNI12-xx ISA and PCI adapters
 # vmx:	VMware VMXNET3 Ethernet (BSD open source)
@ -596,7 +595,6 @@ hint.le.0.drq="0"
 device		mlx4		# Shared code module between IB and Ethernet
 device  	mlx4ib		# Mellanox ConnectX HCA InfiniBand
 device  	mlx4en		# Mellanox ConnectX HCA Ethernet
 device  	mthca		# Mellanox HCA InfiniBand
 device		nfe		# nVidia nForce MCP on-board Ethernet
 device		sbni
 hint.sbni.0.at="isa"
--- a/sys/modules/Makefile
+++ b/sys/modules/Makefile
@ -264,7 +264,6 @@ SUBDIR=	\
 	msdosfs_iconv \
 	${_mse} \
 	msk \
 	${_mthca} \
 	mvs \
 	mwl \
 	${_mwlfw} \
@ -680,9 +679,6 @@ _mlx4ib=	mlx4ib
 _mlx5ib=	mlx5ib
 .endif
 _mly=		mly
 .if ${MK_OFED} != "no" || defined(ALL_MODULES)
 _mthca=		mthca
 .endif
 _nfe=		nfe
 _nvd=		nvd
 _nvme=		nvme
--- a/sys/modules/mthca/Makefile
+++ b/sys/modules/mthca/Makefile
@ -1,17 +0,0 @@
 # $FreeBSD$
 .PATH:  ${SRCTOP}/sys/ofed/drivers/infiniband/hw/mthca
 KMOD    = mthca
 SRCS    = device_if.h bus_if.h pci_if.h vnode_if.h
 SRCS+=	mthca_allocator.c mthca_av.c mthca_catas.c mthca_cmd.c mthca_cq.c
 SRCS+=	mthca_eq.c mthca_mad.c mthca_main.c mthca_mcg.c mthca_memfree.c
 SRCS+=	mthca_mr.c mthca_pd.c mthca_profile.c mthca_provider.c mthca_qp.c
 SRCS+=	mthca_reset.c mthca_srq.c mthca_uar.c
 SRCS+=	opt_inet.h opt_inet6.h
 CFLAGS+= -I${SRCTOP}/sys/ofed/include
 CFLAGS+= -I${SRCTOP}/sys/compat/linuxkpi/common/include
 .include <bsd.kmod.mk>
 CFLAGS+= -Wno-cast-qual -Wno-pointer-arith
--- a/sys/ofed/drivers/infiniband/Kconfig
+++ b/sys/ofed/drivers/infiniband/Kconfig
@ -1,66 +0,0 @@
 menuconfig INFINIBAND
 	tristate "InfiniBand support"
 	depends on PCI || BROKEN
 	depends on HAS_IOMEM
 	---help---
 	  Core support for InfiniBand (IB).  Make sure to also select
 	  any protocols you wish to use as well as drivers for your
 	  InfiniBand hardware.
 if INFINIBAND
 config INFINIBAND_USER_MAD
 	tristate "InfiniBand userspace MAD support"
 	depends on INFINIBAND
 	---help---
 	  Userspace InfiniBand Management Datagram (MAD) support.  This
 	  is the kernel side of the userspace MAD support, which allows
 	  userspace processes to send and receive MADs. You will also
 	  need libibumad from <http://www.openib.org>.
 config INFINIBAND_USER_ACCESS
 	tristate "InfiniBand userspace access (verbs and CM)"
 	---help---
 	  Userspace InfiniBand access support.  This enables the
 	  kernel side of userspace verbs and the userspace
 	  communication manager (CM).  This allows userspace processes
 	  to set up connections and directly access InfiniBand
 	  hardware for fast-path operations.  You will also need
 	  libibverbs, libibcm and a hardware driver library from
 	  <http://www.openib.org>.
 config INFINIBAND_USER_MEM
 	bool
 	depends on INFINIBAND_USER_ACCESS != n
 	default y
 config INFINIBAND_ADDR_TRANS
 	bool
 	depends on INET
 	depends on !(INFINIBAND = y && IPV6 = m)
 	default y
 source "drivers/infiniband/hw/mthca/Kconfig"
 source "drivers/infiniband/hw/ipath/Kconfig"
 source "drivers/infiniband/hw/qib/Kconfig"
 source "drivers/infiniband/hw/ehca/Kconfig"
 source "drivers/infiniband/hw/amso1100/Kconfig"
 source "drivers/infiniband/hw/cxgb3/Kconfig"
 source "drivers/infiniband/hw/mlx4/Kconfig"
 source "drivers/infiniband/hw/nes/Kconfig"
 source "drivers/infiniband/ulp/ipoib/Kconfig"
 source "drivers/infiniband/ulp/srp/Kconfig"
 source "drivers/infiniband/ulp/srpt/Kconfig"
 source "drivers/infiniband/ulp/iser/Kconfig"
 source "drivers/infiniband/ulp/sdp/Kconfig"
 source "drivers/infiniband/ulp/qlgc_vnic/Kconfig"
 source "drivers/infiniband/util/Kconfig"
 endif # INFINIBAND
--- a/sys/ofed/drivers/infiniband/Makefile
+++ b/sys/ofed/drivers/infiniband/Makefile
@ -1,17 +0,0 @@
 obj-$(CONFIG_INFINIBAND)		+= core/
 obj-$(CONFIG_INFINIBAND_MTHCA)		+= hw/mthca/
 obj-$(CONFIG_INFINIBAND_IPATH)		+= hw/ipath/
 obj-$(CONFIG_INFINIBAND_QIB)		+= hw/qib/
 obj-$(CONFIG_INFINIBAND_EHCA)		+= hw/ehca/
 obj-$(CONFIG_INFINIBAND_AMSO1100)	+= hw/amso1100/
 obj-$(CONFIG_INFINIBAND_CXGB3)		+= hw/cxgb3/
 obj-$(CONFIG_INFINIBAND_NES)		+= hw/nes/
 obj-$(CONFIG_MLX4_INFINIBAND)		+= hw/mlx4/
 obj-$(CONFIG_INFINIBAND_NES)		+= hw/nes/
 obj-$(CONFIG_INFINIBAND_IPOIB)		+= ulp/ipoib/
 obj-$(CONFIG_INFINIBAND_SRP)		+= ulp/srp/
 obj-$(CONFIG_INFINIBAND_SRPT)		+= ulp/srpt/
 obj-$(CONFIG_INFINIBAND_ISER)		+= ulp/iser/
 obj-$(CONFIG_INFINIBAND_SDP)		+= ulp/sdp/
 obj-$(CONFIG_INFINIBAND_QLGC_VNIC)	+= ulp/qlgc_vnic/
 obj-$(CONFIG_INFINIBAND_MADEYE)		+= util/
--- a/sys/ofed/drivers/infiniband/hw/mthca/Kconfig
+++ b/sys/ofed/drivers/infiniband/hw/mthca/Kconfig
@ -1,17 +0,0 @@
 config INFINIBAND_MTHCA
 	tristate "Mellanox HCA support"
 	depends on PCI
 	---help---
 	  This is a low-level driver for Mellanox InfiniHost host
 	  channel adapters (HCAs), including the MT23108 PCI-X HCA
 	  ("Tavor") and the MT25208 PCI Express HCA ("Arbel").
 config INFINIBAND_MTHCA_DEBUG
 	bool "Verbose debugging output" if EMBEDDED
 	depends on INFINIBAND_MTHCA
 	default y
 	---help---
 	  This option causes debugging code to be compiled into the
 	  mthca driver.  The output can be turned on via the
 	  debug_level module parameter (which can also be set after
 	  the driver is loaded through sysfs).
--- a/sys/ofed/drivers/infiniband/hw/mthca/Makefile
+++ b/sys/ofed/drivers/infiniband/hw/mthca/Makefile
@ -1,7 +0,0 @@
 obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o
 ib_mthca-y :=	mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \
 		mthca_allocator.o mthca_eq.o mthca_pd.o mthca_cq.o \
 		mthca_mr.o mthca_qp.o mthca_av.o mthca_mcg.o mthca_mad.o \
 		mthca_provider.o mthca_memfree.o mthca_uar.o mthca_srq.o \
 		mthca_catas.o
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_allocator.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_allocator.c
@ -1,300 +0,0 @@
 /*
 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include "mthca_dev.h"
 /* Trivial bitmap-based allocator */
 u32 mthca_alloc(struct mthca_alloc *alloc)
 {
 	unsigned long flags;
 	u32 obj;
 	spin_lock_irqsave(&alloc->lock, flags);
 	obj = find_next_zero_bit(alloc->table, alloc->max, alloc->last);
 	if (obj >= alloc->max) {
 		alloc->top = (alloc->top + alloc->max) & alloc->mask;
 		obj = find_first_zero_bit(alloc->table, alloc->max);
 	}
 	if (obj < alloc->max) {
 		set_bit(obj, alloc->table);
 		obj |= alloc->top;
 	} else
 		obj = -1;
 	spin_unlock_irqrestore(&alloc->lock, flags);
 	return obj;
 }
 void mthca_free(struct mthca_alloc *alloc, u32 obj)
 {
 	unsigned long flags;
 	obj &= alloc->max - 1;
 	spin_lock_irqsave(&alloc->lock, flags);
 	clear_bit(obj, alloc->table);
 	alloc->last = min(alloc->last, obj);
 	alloc->top = (alloc->top + alloc->max) & alloc->mask;
 	spin_unlock_irqrestore(&alloc->lock, flags);
 }
 int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask,
 		     u32 reserved)
 {
 	int i;
 	/* num must be a power of 2 */
 	if (num != 1 << (ffs(num) - 1))
 		return -EINVAL;
 	alloc->last = 0;
 	alloc->top  = 0;
 	alloc->max  = num;
 	alloc->mask = mask;
 	spin_lock_init(&alloc->lock);
 	alloc->table = kmalloc(BITS_TO_LONGS(num) * sizeof (long),
 			       GFP_KERNEL);
 	if (!alloc->table)
 		return -ENOMEM;
 	bitmap_zero(alloc->table, num);
 	for (i = 0; i < reserved; ++i)
 		set_bit(i, alloc->table);
 	return 0;
 }
 void mthca_alloc_cleanup(struct mthca_alloc *alloc)
 {
 	kfree(alloc->table);
 }
 /*
 * Array of pointers with lazy allocation of leaf pages.  Callers of
 * _get, _set and _clear methods must use a lock or otherwise
 * serialize access to the array.
 */
 #define MTHCA_ARRAY_MASK (PAGE_SIZE / sizeof (void *) - 1)
 void *mthca_array_get(struct mthca_array *array, int index)
 {
 	int p = (index * sizeof (void *)) >> PAGE_SHIFT;
 	if (array->page_list[p].page)
 		return array->page_list[p].page[index & MTHCA_ARRAY_MASK];
 	else
 		return NULL;
 }
 int mthca_array_set(struct mthca_array *array, int index, void *value)
 {
 	int p = (index * sizeof (void *)) >> PAGE_SHIFT;
 	/* Allocate with GFP_ATOMIC because we'll be called with locks held. */
 	if (!array->page_list[p].page)
 		array->page_list[p].page = (void **) get_zeroed_page(GFP_ATOMIC);
 	if (!array->page_list[p].page)
 		return -ENOMEM;
 	array->page_list[p].page[index & MTHCA_ARRAY_MASK] = value;
 	++array->page_list[p].used;
 	return 0;
 }
 void mthca_array_clear(struct mthca_array *array, int index)
 {
 	int p = (index * sizeof (void *)) >> PAGE_SHIFT;
 	if (--array->page_list[p].used == 0) {
 		free_page((unsigned long) array->page_list[p].page);
 		array->page_list[p].page = NULL;
 	} else
 		array->page_list[p].page[index & MTHCA_ARRAY_MASK] = NULL;
 	if (array->page_list[p].used < 0)
 		pr_debug("Array %p index %d page %d with ref count %d < 0\n",
 			 array, index, p, array->page_list[p].used);
 }
 int mthca_array_init(struct mthca_array *array, int nent)
 {
 	int npage = (nent * sizeof (void *) + PAGE_SIZE - 1) / PAGE_SIZE;
 	int i;
 	array->page_list = kmalloc(npage * sizeof *array->page_list, GFP_KERNEL);
 	if (!array->page_list)
 		return -ENOMEM;
 	for (i = 0; i < npage; ++i) {
 		array->page_list[i].page = NULL;
 		array->page_list[i].used = 0;
 	}
 	return 0;
 }
 void mthca_array_cleanup(struct mthca_array *array, int nent)
 {
 	int i;
 	for (i = 0; i < (nent * sizeof (void *) + PAGE_SIZE - 1) / PAGE_SIZE; ++i)
 		free_page((unsigned long) array->page_list[i].page);
 	kfree(array->page_list);
 }
 /*
 * Handling for queue buffers -- we allocate a bunch of memory and
 * register it in a memory region at HCA virtual address 0.  If the
 * requested size is > max_direct, we split the allocation into
 * multiple pages, so we don't require too much contiguous memory.
 */
 int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct,
 		    union mthca_buf *buf, int *is_direct, struct mthca_pd *pd,
 		    int hca_write, struct mthca_mr *mr)
 {
 	int err = -ENOMEM;
 	int npages, shift;
 	u64 *dma_list = NULL;
 	dma_addr_t t;
 	int i;
 	if (size <= max_direct) {
 		*is_direct = 1;
 		npages     = 1;
 		shift      = get_order(size) + PAGE_SHIFT;
 		buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev,
 						     size, &t, GFP_KERNEL);
 		if (!buf->direct.buf)
 			return -ENOMEM;
 		pci_unmap_addr_set(&buf->direct, mapping, t);
 		memset(buf->direct.buf, 0, size);
 		while (t & ((1 << shift) - 1)) {
 			--shift;
 			npages *= 2;
 		}
 		dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
 		if (!dma_list)
 			goto err_free;
 		for (i = 0; i < npages; ++i)
 			dma_list[i] = t + i * (1 << shift);
 	} else {
 		*is_direct = 0;
 		npages     = (size + PAGE_SIZE - 1) / PAGE_SIZE;
 		shift      = PAGE_SHIFT;
 		dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
 		if (!dma_list)
 			return -ENOMEM;
 		buf->page_list = kmalloc(npages * sizeof *buf->page_list,
 					 GFP_KERNEL);
 		if (!buf->page_list)
 			goto err_out;
 		for (i = 0; i < npages; ++i)
 			buf->page_list[i].buf = NULL;
 		for (i = 0; i < npages; ++i) {
 			buf->page_list[i].buf =
 				dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
 						   &t, GFP_KERNEL);
 			if (!buf->page_list[i].buf)
 				goto err_free;
 			dma_list[i] = t;
 			pci_unmap_addr_set(&buf->page_list[i], mapping, t);
 			clear_page(buf->page_list[i].buf);
 		}
 	}
 	err = mthca_mr_alloc_phys(dev, pd->pd_num,
 				  dma_list, shift, npages,
 				  0, size,
 				  MTHCA_MPT_FLAG_LOCAL_READ |
 				  (hca_write ? MTHCA_MPT_FLAG_LOCAL_WRITE : 0),
 				  mr);
 	if (err)
 		goto err_free;
 	kfree(dma_list);
 	return 0;
 err_free:
 	mthca_buf_free(dev, size, buf, *is_direct, NULL);
 err_out:
 	kfree(dma_list);
 	return err;
 }
 void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf,
 		    int is_direct, struct mthca_mr *mr)
 {
 	int i;
 	if (mr)
 		mthca_free_mr(dev, mr);
 	if (is_direct)
 		dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf,
 				  pci_unmap_addr(&buf->direct, mapping));
 	else {
 		for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i)
 			dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
 					  buf->page_list[i].buf,
 					  pci_unmap_addr(&buf->page_list[i],
 							 mapping));
 		kfree(buf->page_list);
 	}
 }
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_av.c
@ -1,374 +0,0 @@
 /*
 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_cache.h>
 #include "mthca_dev.h"
 enum {
      MTHCA_RATE_TAVOR_FULL   = 0,
      MTHCA_RATE_TAVOR_1X     = 1,
      MTHCA_RATE_TAVOR_4X     = 2,
      MTHCA_RATE_TAVOR_1X_DDR = 3
 };
 enum {
      MTHCA_RATE_MEMFREE_FULL    = 0,
      MTHCA_RATE_MEMFREE_QUARTER = 1,
      MTHCA_RATE_MEMFREE_EIGHTH  = 2,
      MTHCA_RATE_MEMFREE_HALF    = 3
 };
 struct mthca_av {
 	__be32 port_pd;
 	u8     reserved1;
 	u8     g_slid;
 	__be16 dlid;
 	u8     reserved2;
 	u8     gid_index;
 	u8     msg_sr;
 	u8     hop_limit;
 	__be32 sl_tclass_flowlabel;
 	__be32 dgid[4];
 };
 static enum ib_rate memfree_rate_to_ib(u8 mthca_rate, u8 port_rate)
 {
 	switch (mthca_rate) {
 	case MTHCA_RATE_MEMFREE_EIGHTH:
 		return mult_to_ib_rate(port_rate >> 3);
 	case MTHCA_RATE_MEMFREE_QUARTER:
 		return mult_to_ib_rate(port_rate >> 2);
 	case MTHCA_RATE_MEMFREE_HALF:
 		return mult_to_ib_rate(port_rate >> 1);
 	case MTHCA_RATE_MEMFREE_FULL:
 	default:
 		return mult_to_ib_rate(port_rate);
 	}
 }
 static enum ib_rate tavor_rate_to_ib(u8 mthca_rate, u8 port_rate)
 {
 	switch (mthca_rate) {
 	case MTHCA_RATE_TAVOR_1X:     return IB_RATE_2_5_GBPS;
 	case MTHCA_RATE_TAVOR_1X_DDR: return IB_RATE_5_GBPS;
 	case MTHCA_RATE_TAVOR_4X:     return IB_RATE_10_GBPS;
 	default:		      return mult_to_ib_rate(port_rate);
 	}
 }
 enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port)
 {
 	if (mthca_is_memfree(dev)) {
 		/* Handle old Arbel FW */
 		if (dev->limits.stat_rate_support == 0x3 && mthca_rate)
 			return IB_RATE_2_5_GBPS;
 		return memfree_rate_to_ib(mthca_rate, dev->rate[port - 1]);
 	} else
 		return tavor_rate_to_ib(mthca_rate, dev->rate[port - 1]);
 }
 static u8 ib_rate_to_memfree(u8 req_rate, u8 cur_rate)
 {
 	if (cur_rate <= req_rate)
 		return 0;
 	/*
 	 * Inter-packet delay (IPD) to get from rate X down to a rate
 	 * no more than Y is (X - 1) / Y.
 	 */
 	switch ((cur_rate - 1) / req_rate) {
 	case 0:	 return MTHCA_RATE_MEMFREE_FULL;
 	case 1:	 return MTHCA_RATE_MEMFREE_HALF;
 	case 2:	 /* fall through */
 	case 3:	 return MTHCA_RATE_MEMFREE_QUARTER;
 	default: return MTHCA_RATE_MEMFREE_EIGHTH;
 	}
 }
 static u8 ib_rate_to_tavor(u8 static_rate)
 {
 	switch (static_rate) {
 	case IB_RATE_2_5_GBPS: return MTHCA_RATE_TAVOR_1X;
 	case IB_RATE_5_GBPS:   return MTHCA_RATE_TAVOR_1X_DDR;
 	case IB_RATE_10_GBPS:  return MTHCA_RATE_TAVOR_4X;
 	default:	       return MTHCA_RATE_TAVOR_FULL;
 	}
 }
 u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port)
 {
 	u8 rate;
 	if (!static_rate || ib_rate_to_mult(static_rate) >= dev->rate[port - 1])
 		return 0;
 	if (mthca_is_memfree(dev))
 		rate = ib_rate_to_memfree(ib_rate_to_mult(static_rate),
 					  dev->rate[port - 1]);
 	else
 		rate = ib_rate_to_tavor(static_rate);
 	if (!(dev->limits.stat_rate_support & (1 << rate)))
 		rate = 1;
 	return rate;
 }
 int mthca_create_ah(struct mthca_dev *dev,
 		    struct mthca_pd *pd,
 		    struct ib_ah_attr *ah_attr,
 		    struct mthca_ah *ah)
 {
 	u32 index = -1;
 	struct mthca_av *av = NULL;
 	ah->type = MTHCA_AH_PCI_POOL;
 	if (mthca_is_memfree(dev)) {
 		ah->av   = kmalloc(sizeof *ah->av, GFP_ATOMIC);
 		if (!ah->av)
 			return -ENOMEM;
 		ah->type = MTHCA_AH_KMALLOC;
 		av       = ah->av;
 	} else if (!atomic_read(&pd->sqp_count) &&
 		 !(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) {
 		index = mthca_alloc(&dev->av_table.alloc);
 		/* fall back to allocate in host memory */
 		if (index == -1)
 			goto on_hca_fail;
 		av = kmalloc(sizeof *av, GFP_ATOMIC);
 		if (!av)
 			goto on_hca_fail;
 		ah->type = MTHCA_AH_ON_HCA;
 		ah->avdma  = dev->av_table.ddr_av_base +
 			index * MTHCA_AV_SIZE;
 	}
 on_hca_fail:
 	if (ah->type == MTHCA_AH_PCI_POOL) {
 		ah->av = pci_pool_alloc(dev->av_table.pool,
 					GFP_ATOMIC, &ah->avdma);
 		if (!ah->av)
 			return -ENOMEM;
 		av = ah->av;
 	}
 	ah->key = pd->ntmr.ibmr.lkey;
 	memset(av, 0, MTHCA_AV_SIZE);
 	av->port_pd = cpu_to_be32(pd->pd_num | (ah_attr->port_num << 24));
 	av->g_slid  = ah_attr->src_path_bits;
 	av->dlid    = cpu_to_be16(ah_attr->dlid);
 	av->msg_sr  = (3 << 4) | /* 2K message */
 		mthca_get_rate(dev, ah_attr->static_rate, ah_attr->port_num);
 	av->sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
 	if (ah_attr->ah_flags & IB_AH_GRH) {
 		av->g_slid |= 0x80;
 		av->gid_index = (ah_attr->port_num - 1) * dev->limits.gid_table_len +
 			ah_attr->grh.sgid_index;
 		av->hop_limit = ah_attr->grh.hop_limit;
 		av->sl_tclass_flowlabel |=
 			cpu_to_be32((ah_attr->grh.traffic_class << 20) |
 				    ah_attr->grh.flow_label);
 		memcpy(av->dgid, ah_attr->grh.dgid.raw, 16);
 	} else {
 		/* Arbel workaround -- low byte of GID must be 2 */
 		av->dgid[3] = cpu_to_be32(2);
 	}
 	if (0) {
 		int j;
 		mthca_dbg(dev, "Created UDAV at %p/%08lx:\n",
 			  av, (unsigned long) ah->avdma);
 		for (j = 0; j < 8; ++j)
 			printk(KERN_DEBUG "  [%2x] %08x\n",
 			       j * 4, be32_to_cpu(((__be32 *) av)[j]));
 	}
 	if (ah->type == MTHCA_AH_ON_HCA) {
 		memcpy_toio(dev->av_table.av_map + index * MTHCA_AV_SIZE,
 			    av, MTHCA_AV_SIZE);
 		kfree(av);
 	}
 	return 0;
 }
 int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah)
 {
 	switch (ah->type) {
 	case MTHCA_AH_ON_HCA:
 		mthca_free(&dev->av_table.alloc,
 			   (ah->avdma - dev->av_table.ddr_av_base) /
 			   MTHCA_AV_SIZE);
 		break;
 	case MTHCA_AH_PCI_POOL:
 		pci_pool_free(dev->av_table.pool, ah->av, ah->avdma);
 		break;
 	case MTHCA_AH_KMALLOC:
 		kfree(ah->av);
 		break;
 	}
 	return 0;
 }
 int mthca_ah_grh_present(struct mthca_ah *ah)
 {
 	return !!(ah->av->g_slid & 0x80);
 }
 int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
 		  struct ib_ud_header *header)
 {
 	if (ah->type == MTHCA_AH_ON_HCA)
 		return -EINVAL;
 	header->lrh.service_level   = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28;
 	header->lrh.destination_lid = ah->av->dlid;
 	header->lrh.source_lid      = cpu_to_be16(ah->av->g_slid & 0x7f);
 	if (mthca_ah_grh_present(ah)) {
 		header->grh.traffic_class =
 			(be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20) & 0xff;
 		header->grh.flow_label    =
 			ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff);
 		header->grh.hop_limit     = ah->av->hop_limit;
 		ib_get_cached_gid(&dev->ib_dev,
 				  be32_to_cpu(ah->av->port_pd) >> 24,
 				  ah->av->gid_index % dev->limits.gid_table_len,
 				  &header->grh.source_gid);
 		memcpy(header->grh.destination_gid.raw,
 		       ah->av->dgid, 16);
 	}
 	return 0;
 }
 int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr)
 {
 	struct mthca_ah *ah   = to_mah(ibah);
 	struct mthca_dev *dev = to_mdev(ibah->device);
 	/* Only implement for MAD and memfree ah for now. */
 	if (ah->type == MTHCA_AH_ON_HCA)
 		return -ENOSYS;
 	memset(attr, 0, sizeof *attr);
 	attr->dlid          = be16_to_cpu(ah->av->dlid);
 	attr->sl            = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28;
 	attr->port_num      = be32_to_cpu(ah->av->port_pd) >> 24;
 	attr->static_rate   = mthca_rate_to_ib(dev, ah->av->msg_sr & 0x7,
 					       attr->port_num);
 	attr->src_path_bits = ah->av->g_slid & 0x7F;
 	attr->ah_flags      = mthca_ah_grh_present(ah) ? IB_AH_GRH : 0;
 	if (attr->ah_flags) {
 		attr->grh.traffic_class =
 			be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20;
 		attr->grh.flow_label =
 			be32_to_cpu(ah->av->sl_tclass_flowlabel) & 0xfffff;
 		attr->grh.hop_limit  = ah->av->hop_limit;
 		attr->grh.sgid_index = ah->av->gid_index &
 				       (dev->limits.gid_table_len - 1);
 		memcpy(attr->grh.dgid.raw, ah->av->dgid, 16);
 	}
 	return 0;
 }
 int mthca_init_av_table(struct mthca_dev *dev)
 {
 	int err;
 	if (mthca_is_memfree(dev))
 		return 0;
 	err = mthca_alloc_init(&dev->av_table.alloc,
 			       dev->av_table.num_ddr_avs,
 			       dev->av_table.num_ddr_avs - 1,
 			       0);
 	if (err)
 		return err;
 	dev->av_table.pool = pci_pool_create("mthca_av", dev->pdev,
 					     MTHCA_AV_SIZE,
 					     MTHCA_AV_SIZE, 0);
 	if (!dev->av_table.pool)
 		goto out_free_alloc;
 	if (!(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) {
 		dev->av_table.av_map = ioremap(pci_resource_start(dev->pdev, 4) +
 					       dev->av_table.ddr_av_base -
 					       dev->ddr_start,
 					       dev->av_table.num_ddr_avs *
 					       MTHCA_AV_SIZE);
 		if (!dev->av_table.av_map)
 			goto out_free_pool;
 	} else
 		dev->av_table.av_map = NULL;
 	return 0;
 out_free_pool:
 	pci_pool_destroy(dev->av_table.pool);
 out_free_alloc:
 	mthca_alloc_cleanup(&dev->av_table.alloc);
 	return -ENOMEM;
 }
 void mthca_cleanup_av_table(struct mthca_dev *dev)
 {
 	if (mthca_is_memfree(dev))
 		return;
 	if (dev->av_table.av_map)
 		iounmap(dev->av_table.av_map);
 	pci_pool_destroy(dev->av_table.pool);
 	mthca_alloc_cleanup(&dev->av_table.alloc);
 }
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_catas.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_catas.c
@ -1,200 +0,0 @@
 /*
 * Copyright (c) 2005 Cisco Systems.  All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #define	LINUXKPI_PARAM_PREFIX mthca_
 #include <linux/jiffies.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 #include "mthca_dev.h"
 enum {
 	MTHCA_CATAS_TYPE_INTERNAL	= 0,
 	MTHCA_CATAS_TYPE_UPLINK		= 3,
 	MTHCA_CATAS_TYPE_DDR		= 4,
 	MTHCA_CATAS_TYPE_PARITY		= 5,
 };
 #define	MTHCA_CATAS_POLL_INTERVAL	(5 * HZ)
 static DEFINE_SPINLOCK(catas_lock);
 static LIST_HEAD(catas_list);
 static struct workqueue_struct *catas_wq;
 static struct work_struct catas_work;
 static int catas_reset_disable;
 module_param_named(catas_reset_disable, catas_reset_disable, int, 0644);
 MODULE_PARM_DESC(catas_reset_disable, "disable reset on catastrophic event if nonzero");
 static void catas_reset(struct work_struct *work)
 {
 	struct mthca_dev *dev, *tmpdev;
 	LIST_HEAD(tlist);
 	int ret;
 	mutex_lock(&mthca_device_mutex);
 	spin_lock_irq(&catas_lock);
 	list_splice_init(&catas_list, &tlist);
 	spin_unlock_irq(&catas_lock);
 	list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) {
 		struct pci_dev *pdev = dev->pdev;
 		ret = __mthca_restart_one(dev->pdev);
 		/* 'dev' now is not valid */
 		if (ret)
 			printk(KERN_ERR "mthca %s: Reset failed (%d)\n",
 			       pci_name(pdev), ret);
 		else {
 			struct mthca_dev *d = pci_get_drvdata(pdev);
 			mthca_dbg(d, "Reset succeeded\n");
 		}
 	}
 	mutex_unlock(&mthca_device_mutex);
 }
 static void handle_catas(struct mthca_dev *dev)
 {
 	struct ib_event event;
 	unsigned long flags;
 	const char *type;
 	int i;
 	event.device = &dev->ib_dev;
 	event.event  = IB_EVENT_DEVICE_FATAL;
 	event.element.port_num = 0;
 	dev->active = 0;
 	ib_dispatch_event(&event);
 	switch (swab32(readl(dev->catas_err.map)) >> 24) {
 	case MTHCA_CATAS_TYPE_INTERNAL:
 		type = "internal error";
 		break;
 	case MTHCA_CATAS_TYPE_UPLINK:
 		type = "uplink bus error";
 		break;
 	case MTHCA_CATAS_TYPE_DDR:
 		type = "DDR data error";
 		break;
 	case MTHCA_CATAS_TYPE_PARITY:
 		type = "internal parity error";
 		break;
 	default:
 		type = "unknown error";
 		break;
 	}
 	mthca_err(dev, "Catastrophic error detected: %s\n", type);
 	for (i = 0; i < dev->catas_err.size; ++i)
 		mthca_err(dev, "  buf[%02x]: %08x\n",
 			  i, swab32(readl(dev->catas_err.map + i)));
 	if (catas_reset_disable)
 		return;
 	spin_lock_irqsave(&catas_lock, flags);
 	list_add(&dev->catas_err.list, &catas_list);
 	queue_work(catas_wq, &catas_work);
 	spin_unlock_irqrestore(&catas_lock, flags);
 }
 static void poll_catas(unsigned long dev_ptr)
 {
 	struct mthca_dev *dev = (struct mthca_dev *) dev_ptr;
 	int i;
 	for (i = 0; i < dev->catas_err.size; ++i)
 		if (readl(dev->catas_err.map + i)) {
 			handle_catas(dev);
 			return;
 		}
 	mod_timer(&dev->catas_err.timer,
 		  round_jiffies(jiffies + MTHCA_CATAS_POLL_INTERVAL));
 }
 void mthca_start_catas_poll(struct mthca_dev *dev)
 {
 	unsigned long addr;
 	init_timer(&dev->catas_err.timer);
 	dev->catas_err.map  = NULL;
 	addr = pci_resource_start(dev->pdev, 0) +
 		((pci_resource_len(dev->pdev, 0) - 1) &
 		 dev->catas_err.addr);
 	dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4);
 	if (!dev->catas_err.map) {
 		mthca_warn(dev, "couldn't map catastrophic error region "
 			   "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4);
 		return;
 	}
 	dev->catas_err.timer.data     = (unsigned long) dev;
 	dev->catas_err.timer.function = poll_catas;
 	dev->catas_err.timer.expires  = jiffies + MTHCA_CATAS_POLL_INTERVAL;
 	INIT_LIST_HEAD(&dev->catas_err.list);
 	add_timer(&dev->catas_err.timer);
 }
 void mthca_stop_catas_poll(struct mthca_dev *dev)
 {
 	del_timer_sync(&dev->catas_err.timer);
 	if (dev->catas_err.map)
 		iounmap(dev->catas_err.map);
 	spin_lock_irq(&catas_lock);
 	list_del(&dev->catas_err.list);
 	spin_unlock_irq(&catas_lock);
 }
 int __init mthca_catas_init(void)
 {
 	INIT_WORK(&catas_work, catas_reset);
 	catas_wq = create_singlethread_workqueue("mthcacatas");
 	if (!catas_wq)
 		return -ENOMEM;
 	return 0;
 }
 void mthca_catas_cleanup(void)
 {
 	destroy_workqueue(catas_wq);
 }
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_cmd.c
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_cmd.h
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_cmd.h
@ -1,341 +0,0 @@
 /*
 * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
 * Copyright (c) 2006 Cisco Systems.  All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #ifndef MTHCA_CMD_H
 #define MTHCA_CMD_H
 #include <rdma/ib_verbs.h>
 #define MTHCA_MAILBOX_SIZE 4096
 enum {
 	/* command completed successfully: */
 	MTHCA_CMD_STAT_OK 	      = 0x00,
 	/* Internal error (such as a bus error) occurred while processing command: */
 	MTHCA_CMD_STAT_INTERNAL_ERR   = 0x01,
 	/* Operation/command not supported or opcode modifier not supported: */
 	MTHCA_CMD_STAT_BAD_OP 	      = 0x02,
 	/* Parameter not supported or parameter out of range: */
 	MTHCA_CMD_STAT_BAD_PARAM      = 0x03,
 	/* System not enabled or bad system state: */
 	MTHCA_CMD_STAT_BAD_SYS_STATE  = 0x04,
 	/* Attempt to access reserved or unallocaterd resource: */
 	MTHCA_CMD_STAT_BAD_RESOURCE   = 0x05,
 	/* Requested resource is currently executing a command, or is otherwise busy: */
 	MTHCA_CMD_STAT_RESOURCE_BUSY  = 0x06,
 	/* memory error: */
 	MTHCA_CMD_STAT_DDR_MEM_ERR    = 0x07,
 	/* Required capability exceeds device limits: */
 	MTHCA_CMD_STAT_EXCEED_LIM     = 0x08,
 	/* Resource is not in the appropriate state or ownership: */
 	MTHCA_CMD_STAT_BAD_RES_STATE  = 0x09,
 	/* Index out of range: */
 	MTHCA_CMD_STAT_BAD_INDEX      = 0x0a,
 	/* FW image corrupted: */
 	MTHCA_CMD_STAT_BAD_NVMEM      = 0x0b,
 	/* Attempt to modify a QP/EE which is not in the presumed state: */
 	MTHCA_CMD_STAT_BAD_QPEE_STATE = 0x10,
 	/* Bad segment parameters (Address/Size): */
 	MTHCA_CMD_STAT_BAD_SEG_PARAM  = 0x20,
 	/* Memory Region has Memory Windows bound to: */
 	MTHCA_CMD_STAT_REG_BOUND      = 0x21,
 	/* HCA local attached memory not present: */
 	MTHCA_CMD_STAT_LAM_NOT_PRE    = 0x22,
 	/* Bad management packet (silently discarded): */
 	MTHCA_CMD_STAT_BAD_PKT 	      = 0x30,
 	/* More outstanding CQEs in CQ than new CQ size: */
 	MTHCA_CMD_STAT_BAD_SIZE       = 0x40
 };
 enum {
 	MTHCA_TRANS_INVALID = 0,
 	MTHCA_TRANS_RST2INIT,
 	MTHCA_TRANS_INIT2INIT,
 	MTHCA_TRANS_INIT2RTR,
 	MTHCA_TRANS_RTR2RTS,
 	MTHCA_TRANS_RTS2RTS,
 	MTHCA_TRANS_SQERR2RTS,
 	MTHCA_TRANS_ANY2ERR,
 	MTHCA_TRANS_RTS2SQD,
 	MTHCA_TRANS_SQD2SQD,
 	MTHCA_TRANS_SQD2RTS,
 	MTHCA_TRANS_ANY2RST,
 };
 enum {
 	DEV_LIM_FLAG_RC                 = 1 << 0,
 	DEV_LIM_FLAG_UC                 = 1 << 1,
 	DEV_LIM_FLAG_UD                 = 1 << 2,
 	DEV_LIM_FLAG_RD                 = 1 << 3,
 	DEV_LIM_FLAG_RAW_IPV6           = 1 << 4,
 	DEV_LIM_FLAG_RAW_ETHER          = 1 << 5,
 	DEV_LIM_FLAG_SRQ                = 1 << 6,
 	DEV_LIM_FLAG_IPOIB_CSUM		= 1 << 7,
 	DEV_LIM_FLAG_BAD_PKEY_CNTR      = 1 << 8,
 	DEV_LIM_FLAG_BAD_QKEY_CNTR      = 1 << 9,
 	DEV_LIM_FLAG_MW                 = 1 << 16,
 	DEV_LIM_FLAG_AUTO_PATH_MIG      = 1 << 17,
 	DEV_LIM_FLAG_ATOMIC             = 1 << 18,
 	DEV_LIM_FLAG_RAW_MULTI          = 1 << 19,
 	DEV_LIM_FLAG_UD_AV_PORT_ENFORCE = 1 << 20,
 	DEV_LIM_FLAG_UD_MULTI           = 1 << 21,
 };
 enum {
 	DIAG_RPRT_Q_XPRT_CIERR = 2,
 	DIAG_RPRT_QR_XPRT_CIERR = 3,
 	DIAG_RPRT_Q_PERF = 4,
 	DIAG_RPRT_QR_PERF = 5,
 	DIAG_RPRT_Q_MISC = 6,
 	DIAG_RPRT_QR_MISC = 7,
 };
 struct mthca_mailbox {
 	dma_addr_t dma;
 	void      *buf;
 };
 struct mthca_dev_lim {
 	int max_srq_sz;
 	int max_qp_sz;
 	int reserved_qps;
 	int max_qps;
 	int reserved_srqs;
 	int max_srqs;
 	int reserved_eecs;
 	int max_eecs;
 	int max_cq_sz;
 	int reserved_cqs;
 	int max_cqs;
 	int max_mpts;
 	int reserved_eqs;
 	int max_eqs;
 	int reserved_mtts;
 	int max_mrw_sz;
 	int reserved_mrws;
 	int max_mtt_seg;
 	int max_requester_per_qp;
 	int max_responder_per_qp;
 	int max_rdma_global;
 	int local_ca_ack_delay;
 	int max_mtu;
 	int max_port_width;
 	int max_vl;
 	int num_ports;
 	int max_gids;
 	u16 stat_rate_support;
 	int max_pkeys;
 	u32 flags;
 	int reserved_uars;
 	int uar_size;
 	int min_page_sz;
 	int max_sg;
 	int max_desc_sz;
 	int max_qp_per_mcg;
 	int reserved_mgms;
 	int max_mcgs;
 	int reserved_pds;
 	int max_pds;
 	int reserved_rdds;
 	int max_rdds;
 	int eec_entry_sz;
 	int qpc_entry_sz;
 	int eeec_entry_sz;
 	int eqpc_entry_sz;
 	int eqc_entry_sz;
 	int cqc_entry_sz;
 	int srq_entry_sz;
 	int uar_scratch_entry_sz;
 	int mpt_entry_sz;
 	union {
 		struct {
 			int max_avs;
 		} tavor;
 		struct {
 			int resize_srq;
 			int max_pbl_sz;
 			u8  bmme_flags;
 			u32 reserved_lkey;
 			int lam_required;
 			u64 max_icm_sz;
 		} arbel;
 	} hca;
 };
 struct mthca_adapter {
 	u32  vendor_id;
 	u32  device_id;
 	u32  revision_id;
 	char board_id[MTHCA_BOARD_ID_LEN];
 	u8   inta_pin;
 };
 struct mthca_init_hca_param {
 	u64 qpc_base;
 	u64 eec_base;
 	u64 srqc_base;
 	u64 cqc_base;
 	u64 eqpc_base;
 	u64 eeec_base;
 	u64 eqc_base;
 	u64 rdb_base;
 	u64 mc_base;
 	u64 mpt_base;
 	u64 mtt_base;
 	u64 uar_scratch_base;
 	u64 uarc_base;
 	u16 log_mc_entry_sz;
 	u16 mc_hash_sz;
 	u8  log_num_qps;
 	u8  log_num_eecs;
 	u8  log_num_srqs;
 	u8  log_num_cqs;
 	u8  log_num_eqs;
 	u8  log_mc_table_sz;
 	u8  mtt_seg_sz;
 	u8  log_mpt_sz;
 	u8  log_uar_sz;
 	u8  log_uarc_sz;
 };
 struct mthca_init_ib_param {
 	int port_width;
 	int vl_cap;
 	int mtu_cap;
 	u16 gid_cap;
 	u16 pkey_cap;
 	int set_guid0;
 	u64 guid0;
 	int set_node_guid;
 	u64 node_guid;
 	int set_si_guid;
 	u64 si_guid;
 };
 struct mthca_set_ib_param {
 	int set_si_guid;
 	int reset_qkey_viol;
 	u64 si_guid;
 	u32 cap_mask;
 };
 int mthca_cmd_init(struct mthca_dev *dev);
 void mthca_cmd_cleanup(struct mthca_dev *dev);
 int mthca_cmd_use_events(struct mthca_dev *dev);
 void mthca_cmd_use_polling(struct mthca_dev *dev);
 void mthca_cmd_event(struct mthca_dev *dev, u16 token,
 		     u8  status, u64 out_param);
 struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev,
 					  gfp_t gfp_mask);
 void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox);
 int mthca_SYS_EN(struct mthca_dev *dev, u8 *status);
 int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status);
 int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status);
 int mthca_UNMAP_FA(struct mthca_dev *dev, u8 *status);
 int mthca_RUN_FW(struct mthca_dev *dev, u8 *status);
 int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status);
 int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status);
 int mthca_DISABLE_LAM(struct mthca_dev *dev, u8 *status);
 int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status);
 int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
 			struct mthca_dev_lim *dev_lim, u8 *status);
 int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
 			struct mthca_adapter *adapter, u8 *status);
 int mthca_INIT_HCA(struct mthca_dev *dev,
 		   struct mthca_init_hca_param *param,
 		   u8 *status);
 int mthca_INIT_IB(struct mthca_dev *dev,
 		  struct mthca_init_ib_param *param,
 		  int port, u8 *status);
 int mthca_CLOSE_IB(struct mthca_dev *dev, int port, u8 *status);
 int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status);
 int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
 		 int port, u8 *status);
 int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *status);
 int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status);
 int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count, u8 *status);
 int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status);
 int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status);
 int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
 		       u8 *status);
 int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		    int mpt_index, u8 *status);
 int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		    int mpt_index, u8 *status);
 int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		    int num_mtt, u8 *status);
 int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status);
 int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
 		 int eq_num, u8 *status);
 int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		   int eq_num, u8 *status);
 int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		   int eq_num, u8 *status);
 int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		   int cq_num, u8 *status);
 int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		   int cq_num, u8 *status);
 int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size,
 		    u8 *status);
 int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		    int srq_num, u8 *status);
 int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		    int srq_num, u8 *status);
 int mthca_QUERY_SRQ(struct mthca_dev *dev, u32 num,
 		    struct mthca_mailbox *mailbox, u8 *status);
 int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status);
 int mthca_MODIFY_QP(struct mthca_dev *dev, enum ib_qp_state cur,
 		    enum ib_qp_state next, u32 num, int is_ee,
 		    struct mthca_mailbox *mailbox, u32 optmask,
 		    u8 *status);
 int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
 		   struct mthca_mailbox *mailbox, u8 *status);
 int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
 			  u8 *status);
 int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
 		  int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
 		  void *in_mad, void *response_mad, u8 *status);
 int mthca_READ_MGM(struct mthca_dev *dev, int index,
 		   struct mthca_mailbox *mailbox, u8 *status);
 int mthca_WRITE_MGM(struct mthca_dev *dev, int index,
 		    struct mthca_mailbox *mailbox, u8 *status);
 int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		    u16 *hash, u8 *status);
 int mthca_DIAG_RPRT(struct mthca_dev *dev, int mod,
 		    struct mthca_mailbox *mailbox, u8 *status);
 int mthca_NOP(struct mthca_dev *dev, u8 *status);
 #endif /* MTHCA_CMD_H */
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_config_reg.h
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_config_reg.h
@ -1,50 +0,0 @@
 /*
 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #ifndef MTHCA_CONFIG_REG_H
 #define MTHCA_CONFIG_REG_H
 #include <linux/page.h>
 #define MTHCA_HCR_BASE         0x80680
 #define MTHCA_HCR_SIZE         0x0001c
 #define MTHCA_ECR_BASE         0x80700
 #define MTHCA_ECR_SIZE         0x00008
 #define MTHCA_ECR_CLR_BASE     0x80708
 #define MTHCA_ECR_CLR_SIZE     0x00008
 #define MTHCA_MAP_ECR_SIZE     (MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE)
 #define MTHCA_CLR_INT_BASE     0xf00d8
 #define MTHCA_CLR_INT_SIZE     0x00008
 #define MTHCA_EQ_SET_CI_SIZE   (8 * 32)
 #endif /* MTHCA_CONFIG_REG_H */
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_cq.c
@ -1,992 +0,0 @@
 /*
 * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
 * Copyright (c) 2005, 2006 Cisco Systems, Inc. All rights reserved.
 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
 * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #include <linux/hardirq.h>
 #include <linux/sched.h>
 #include <asm/io.h>
 #include <rdma/ib_pack.h>
 #include "mthca_dev.h"
 #include "mthca_cmd.h"
 #include "mthca_memfree.h"
 enum {
 	MTHCA_MAX_DIRECT_CQ_SIZE = 4 * PAGE_SIZE
 };
 enum {
 	MTHCA_CQ_ENTRY_SIZE = 0x20
 };
 enum {
 	MTHCA_ATOMIC_BYTE_LEN = 8
 };
 /*
 * Must be packed because start is 64 bits but only aligned to 32 bits.
 */
 struct mthca_cq_context {
 	__be32 flags;
 	__be64 start;
 	__be32 logsize_usrpage;
 	__be32 error_eqn;	/* Tavor only */
 	__be32 comp_eqn;
 	__be32 pd;
 	__be32 lkey;
 	__be32 last_notified_index;
 	__be32 solicit_producer_index;
 	__be32 consumer_index;
 	__be32 producer_index;
 	__be32 cqn;
 	__be32 ci_db;		/* Arbel only */
 	__be32 state_db;	/* Arbel only */
 	u32    reserved;
 } __attribute__((packed));
 #define MTHCA_CQ_STATUS_OK          ( 0 << 28)
 #define MTHCA_CQ_STATUS_OVERFLOW    ( 9 << 28)
 #define MTHCA_CQ_STATUS_WRITE_FAIL  (10 << 28)
 #define MTHCA_CQ_FLAG_TR            ( 1 << 18)
 #define MTHCA_CQ_FLAG_OI            ( 1 << 17)
 #define MTHCA_CQ_STATE_DISARMED     ( 0 <<  8)
 #define MTHCA_CQ_STATE_ARMED        ( 1 <<  8)
 #define MTHCA_CQ_STATE_ARMED_SOL    ( 4 <<  8)
 #define MTHCA_EQ_STATE_FIRED        (10 <<  8)
 enum {
 	MTHCA_ERROR_CQE_OPCODE_MASK = 0xfe
 };
 enum {
 	SYNDROME_LOCAL_LENGTH_ERR 	 = 0x01,
 	SYNDROME_LOCAL_QP_OP_ERR  	 = 0x02,
 	SYNDROME_LOCAL_EEC_OP_ERR 	 = 0x03,
 	SYNDROME_LOCAL_PROT_ERR   	 = 0x04,
 	SYNDROME_WR_FLUSH_ERR     	 = 0x05,
 	SYNDROME_MW_BIND_ERR      	 = 0x06,
 	SYNDROME_BAD_RESP_ERR     	 = 0x10,
 	SYNDROME_LOCAL_ACCESS_ERR 	 = 0x11,
 	SYNDROME_REMOTE_INVAL_REQ_ERR 	 = 0x12,
 	SYNDROME_REMOTE_ACCESS_ERR 	 = 0x13,
 	SYNDROME_REMOTE_OP_ERR     	 = 0x14,
 	SYNDROME_RETRY_EXC_ERR 		 = 0x15,
 	SYNDROME_RNR_RETRY_EXC_ERR 	 = 0x16,
 	SYNDROME_LOCAL_RDD_VIOL_ERR 	 = 0x20,
 	SYNDROME_REMOTE_INVAL_RD_REQ_ERR = 0x21,
 	SYNDROME_REMOTE_ABORTED_ERR 	 = 0x22,
 	SYNDROME_INVAL_EECN_ERR 	 = 0x23,
 	SYNDROME_INVAL_EEC_STATE_ERR 	 = 0x24
 };
 struct mthca_cqe {
 	__be32 my_qpn;
 	__be32 my_ee;
 	__be32 rqpn;
 	u8     sl_ipok;
 	u8     g_mlpath;
 	__be16 rlid;
 	__be32 imm_etype_pkey_eec;
 	__be32 byte_cnt;
 	__be32 wqe;
 	u8     opcode;
 	u8     is_send;
 	u8     reserved;
 	u8     owner;
 };
 struct mthca_err_cqe {
 	__be32 my_qpn;
 	u32    reserved1[3];
 	u8     syndrome;
 	u8     vendor_err;
 	__be16 db_cnt;
 	u32    reserved2;
 	__be32 wqe;
 	u8     opcode;
 	u8     reserved3[2];
 	u8     owner;
 };
 #define MTHCA_CQ_ENTRY_OWNER_SW      (0 << 7)
 #define MTHCA_CQ_ENTRY_OWNER_HW      (1 << 7)
 #define MTHCA_TAVOR_CQ_DB_INC_CI       (1 << 24)
 #define MTHCA_TAVOR_CQ_DB_REQ_NOT      (2 << 24)
 #define MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL  (3 << 24)
 #define MTHCA_TAVOR_CQ_DB_SET_CI       (4 << 24)
 #define MTHCA_TAVOR_CQ_DB_REQ_NOT_MULT (5 << 24)
 #define MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL  (1 << 24)
 #define MTHCA_ARBEL_CQ_DB_REQ_NOT      (2 << 24)
 #define MTHCA_ARBEL_CQ_DB_REQ_NOT_MULT (3 << 24)
 static inline struct mthca_cqe *get_cqe_from_buf(struct mthca_cq_buf *buf,
 						 int entry)
 {
 	if (buf->is_direct)
 		return buf->queue.direct.buf + (entry * MTHCA_CQ_ENTRY_SIZE);
 	else
 		return buf->queue.page_list[entry * MTHCA_CQ_ENTRY_SIZE / PAGE_SIZE].buf
 			+ (entry * MTHCA_CQ_ENTRY_SIZE) % PAGE_SIZE;
 }
 static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry)
 {
 	return get_cqe_from_buf(&cq->buf, entry);
 }
 static inline struct mthca_cqe *cqe_sw(struct mthca_cqe *cqe)
 {
 	return MTHCA_CQ_ENTRY_OWNER_HW & cqe->owner ? NULL : cqe;
 }
 static inline struct mthca_cqe *next_cqe_sw(struct mthca_cq *cq)
 {
 	return cqe_sw(get_cqe(cq, cq->cons_index & cq->ibcq.cqe));
 }
 static inline void set_cqe_hw(struct mthca_cqe *cqe)
 {
 	cqe->owner = MTHCA_CQ_ENTRY_OWNER_HW;
 }
 static void dump_cqe(struct mthca_dev *dev, void *cqe_ptr)
 {
 	__be32 *cqe = cqe_ptr;
 	(void) cqe;	/* avoid warning if mthca_dbg compiled away... */
 	mthca_dbg(dev, "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
 		  be32_to_cpu(cqe[0]), be32_to_cpu(cqe[1]), be32_to_cpu(cqe[2]),
 		  be32_to_cpu(cqe[3]), be32_to_cpu(cqe[4]), be32_to_cpu(cqe[5]),
 		  be32_to_cpu(cqe[6]), be32_to_cpu(cqe[7]));
 }
 /*
 * incr is ignored in native Arbel (mem-free) mode, so cq->cons_index
 * should be correct before calling update_cons_index().
 */
 static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq,
 				     int incr)
 {
 	if (mthca_is_memfree(dev)) {
 		*cq->set_ci_db = cpu_to_be32(cq->cons_index);
 		wmb();
 	} else {
 		mthca_write64(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn, incr - 1,
 			      dev->kar + MTHCA_CQ_DOORBELL,
 			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
 		/*
 		 * Make sure doorbells don't leak out of CQ spinlock
 		 * and reach the HCA out of order:
 		 */
 		mmiowb();
 	}
 }
 void mthca_cq_completion(struct mthca_dev *dev, u32 cqn)
 {
 	struct mthca_cq *cq;
 	cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1));
 	if (!cq) {
 		mthca_warn(dev, "Completion event for bogus CQ %08x\n", cqn);
 		return;
 	}
 	++cq->arm_sn;
 	cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
 }
 void mthca_cq_event(struct mthca_dev *dev, u32 cqn,
 		    enum ib_event_type event_type)
 {
 	struct mthca_cq *cq;
 	struct ib_event event;
 	spin_lock(&dev->cq_table.lock);
 	cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1));
 	if (cq)
 		++cq->refcount;
 	spin_unlock(&dev->cq_table.lock);
 	if (!cq) {
 		mthca_warn(dev, "Async event for bogus CQ %08x\n", cqn);
 		return;
 	}
 	event.device      = &dev->ib_dev;
 	event.event       = event_type;
 	event.element.cq  = &cq->ibcq;
 	if (cq->ibcq.event_handler)
 		cq->ibcq.event_handler(&event, cq->ibcq.cq_context);
 	spin_lock(&dev->cq_table.lock);
 	if (!--cq->refcount)
 		wake_up(&cq->wait);
 	spin_unlock(&dev->cq_table.lock);
 }
 static inline int is_recv_cqe(struct mthca_cqe *cqe)
 {
 	if ((cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) ==
 	    MTHCA_ERROR_CQE_OPCODE_MASK)
 		return !(cqe->opcode & 0x01);
 	else
 		return !(cqe->is_send & 0x80);
 }
 void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
 		    struct mthca_srq *srq)
 {
 	struct mthca_cqe *cqe;
 	u32 prod_index;
 	int i, nfreed = 0;
 	spin_lock_irq(&cq->lock);
 	/*
 	 * First we need to find the current producer index, so we
 	 * know where to start cleaning from.  It doesn't matter if HW
 	 * adds new entries after this loop -- the QP we're worried
 	 * about is already in RESET, so the new entries won't come
 	 * from our QP and therefore don't need to be checked.
 	 */
 	for (prod_index = cq->cons_index;
 	     cqe_sw(get_cqe(cq, prod_index & cq->ibcq.cqe));
 	     ++prod_index)
 		if (prod_index == cq->cons_index + cq->ibcq.cqe)
 			break;
 	if (0)
 		mthca_dbg(dev, "Cleaning QPN %06x from CQN %06x; ci %d, pi %d\n",
 			  qpn, cq->cqn, cq->cons_index, prod_index);
 	/*
 	 * Now sweep backwards through the CQ, removing CQ entries
 	 * that match our QP by copying older entries on top of them.
 	 */
 	while ((int) --prod_index - (int) cq->cons_index >= 0) {
 		cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
 		if (cqe->my_qpn == cpu_to_be32(qpn)) {
 			if (srq && is_recv_cqe(cqe))
 				mthca_free_srq_wqe(srq, be32_to_cpu(cqe->wqe));
 			++nfreed;
 		} else if (nfreed)
 			memcpy(get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe),
 			       cqe, MTHCA_CQ_ENTRY_SIZE);
 	}
 	if (nfreed) {
 		for (i = 0; i < nfreed; ++i)
 			set_cqe_hw(get_cqe(cq, (cq->cons_index + i) & cq->ibcq.cqe));
 		wmb();
 		cq->cons_index += nfreed;
 		update_cons_index(dev, cq, nfreed);
 	}
 	spin_unlock_irq(&cq->lock);
 }
 void mthca_cq_resize_copy_cqes(struct mthca_cq *cq)
 {
 	int i;
 	/*
 	 * In Tavor mode, the hardware keeps the consumer and producer
 	 * indices mod the CQ size.  Since we might be making the CQ
 	 * bigger, we need to deal with the case where the producer
 	 * index wrapped around before the CQ was resized.
 	 */
 	if (!mthca_is_memfree(to_mdev(cq->ibcq.device)) &&
 	    cq->ibcq.cqe < cq->resize_buf->cqe) {
 		cq->cons_index &= cq->ibcq.cqe;
 		if (cqe_sw(get_cqe(cq, cq->ibcq.cqe)))
 			cq->cons_index -= cq->ibcq.cqe + 1;
 	}
 	for (i = cq->cons_index; cqe_sw(get_cqe(cq, i & cq->ibcq.cqe)); ++i)
 		memcpy(get_cqe_from_buf(&cq->resize_buf->buf,
 					i & cq->resize_buf->cqe),
 		       get_cqe(cq, i & cq->ibcq.cqe), MTHCA_CQ_ENTRY_SIZE);
 }
 int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent)
 {
 	int ret;
 	int i;
 	ret = mthca_buf_alloc(dev, nent * MTHCA_CQ_ENTRY_SIZE,
 			      MTHCA_MAX_DIRECT_CQ_SIZE,
 			      &buf->queue, &buf->is_direct,
 			      &dev->driver_pd, 1, &buf->mr);
 	if (ret)
 		return ret;
 	for (i = 0; i < nent; ++i)
 		set_cqe_hw(get_cqe_from_buf(buf, i));
 	return 0;
 }
 void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe)
 {
 	mthca_buf_free(dev, (cqe + 1) * MTHCA_CQ_ENTRY_SIZE, &buf->queue,
 		       buf->is_direct, &buf->mr);
 }
 static void handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
 			     struct mthca_qp *qp, int wqe_index, int is_send,
 			     struct mthca_err_cqe *cqe,
 			     struct ib_wc *entry, int *free_cqe)
 {
 	int dbd;
 	__be32 new_wqe;
 	if (cqe->syndrome == SYNDROME_LOCAL_QP_OP_ERR) {
 		mthca_dbg(dev, "local QP operation err "
 			  "(QPN %06x, WQE @ %08x, CQN %06x, index %d)\n",
 			  be32_to_cpu(cqe->my_qpn), be32_to_cpu(cqe->wqe),
 			  cq->cqn, cq->cons_index);
 		dump_cqe(dev, cqe);
 	}
 	/*
 	 * For completions in error, only work request ID, status, vendor error
 	 * (and freed resource count for RD) have to be set.
 	 */
 	switch (cqe->syndrome) {
 	case SYNDROME_LOCAL_LENGTH_ERR:
 		entry->status = IB_WC_LOC_LEN_ERR;
 		break;
 	case SYNDROME_LOCAL_QP_OP_ERR:
 		entry->status = IB_WC_LOC_QP_OP_ERR;
 		break;
 	case SYNDROME_LOCAL_EEC_OP_ERR:
 		entry->status = IB_WC_LOC_EEC_OP_ERR;
 		break;
 	case SYNDROME_LOCAL_PROT_ERR:
 		entry->status = IB_WC_LOC_PROT_ERR;
 		break;
 	case SYNDROME_WR_FLUSH_ERR:
 		entry->status = IB_WC_WR_FLUSH_ERR;
 		break;
 	case SYNDROME_MW_BIND_ERR:
 		entry->status = IB_WC_MW_BIND_ERR;
 		break;
 	case SYNDROME_BAD_RESP_ERR:
 		entry->status = IB_WC_BAD_RESP_ERR;
 		break;
 	case SYNDROME_LOCAL_ACCESS_ERR:
 		entry->status = IB_WC_LOC_ACCESS_ERR;
 		break;
 	case SYNDROME_REMOTE_INVAL_REQ_ERR:
 		entry->status = IB_WC_REM_INV_REQ_ERR;
 		break;
 	case SYNDROME_REMOTE_ACCESS_ERR:
 		entry->status = IB_WC_REM_ACCESS_ERR;
 		break;
 	case SYNDROME_REMOTE_OP_ERR:
 		entry->status = IB_WC_REM_OP_ERR;
 		break;
 	case SYNDROME_RETRY_EXC_ERR:
 		entry->status = IB_WC_RETRY_EXC_ERR;
 		break;
 	case SYNDROME_RNR_RETRY_EXC_ERR:
 		entry->status = IB_WC_RNR_RETRY_EXC_ERR;
 		break;
 	case SYNDROME_LOCAL_RDD_VIOL_ERR:
 		entry->status = IB_WC_LOC_RDD_VIOL_ERR;
 		break;
 	case SYNDROME_REMOTE_INVAL_RD_REQ_ERR:
 		entry->status = IB_WC_REM_INV_RD_REQ_ERR;
 		break;
 	case SYNDROME_REMOTE_ABORTED_ERR:
 		entry->status = IB_WC_REM_ABORT_ERR;
 		break;
 	case SYNDROME_INVAL_EECN_ERR:
 		entry->status = IB_WC_INV_EECN_ERR;
 		break;
 	case SYNDROME_INVAL_EEC_STATE_ERR:
 		entry->status = IB_WC_INV_EEC_STATE_ERR;
 		break;
 	default:
 		entry->status = IB_WC_GENERAL_ERR;
 		break;
 	}
 	entry->vendor_err = cqe->vendor_err;
 	/*
 	 * Mem-free HCAs always generate one CQE per WQE, even in the
 	 * error case, so we don't have to check the doorbell count, etc.
 	 */
 	if (mthca_is_memfree(dev))
 		return;
 	mthca_free_err_wqe(dev, qp, is_send, wqe_index, &dbd, &new_wqe);
 	/*
 	 * If we're at the end of the WQE chain, or we've used up our
 	 * doorbell count, free the CQE.  Otherwise just update it for
 	 * the next poll operation.
 	 */
 	if (!(new_wqe & cpu_to_be32(0x3f)) || (!cqe->db_cnt && dbd))
 		return;
 	be16_add_cpu(&cqe->db_cnt, -dbd);
 	cqe->wqe      = new_wqe;
 	cqe->syndrome = SYNDROME_WR_FLUSH_ERR;
 	*free_cqe = 0;
 }
 static inline int mthca_poll_one(struct mthca_dev *dev,
 				 struct mthca_cq *cq,
 				 struct mthca_qp **cur_qp,
 				 int *freed,
 				 struct ib_wc *entry)
 {
 	struct mthca_wq *wq;
 	struct mthca_cqe *cqe;
 	int wqe_index;
 	int is_error;
 	int is_send;
 	int free_cqe = 1;
 	int err = 0;
 	u16 checksum;
 	cqe = next_cqe_sw(cq);
 	if (!cqe)
 		return -EAGAIN;
 	/*
 	 * Make sure we read CQ entry contents after we've checked the
 	 * ownership bit.
 	 */
 	rmb();
 	if (0) {
 		mthca_dbg(dev, "%x/%d: CQE -> QPN %06x, WQE @ %08x\n",
 			  cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn),
 			  be32_to_cpu(cqe->wqe));
 		dump_cqe(dev, cqe);
 	}
 	is_error = (cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) ==
 		MTHCA_ERROR_CQE_OPCODE_MASK;
 	is_send  = is_error ? cqe->opcode & 0x01 : cqe->is_send & 0x80;
 	if (!*cur_qp || be32_to_cpu(cqe->my_qpn) != (*cur_qp)->qpn) {
 		/*
 		 * We do not have to take the QP table lock here,
 		 * because CQs will be locked while QPs are removed
 		 * from the table.
 		 */
 		*cur_qp = mthca_array_get(&dev->qp_table.qp,
 					  be32_to_cpu(cqe->my_qpn) &
 					  (dev->limits.num_qps - 1));
 		if (!*cur_qp) {
 			mthca_warn(dev, "CQ entry for unknown QP %06x\n",
 				   be32_to_cpu(cqe->my_qpn) & 0xffffff);
 			err = -EINVAL;
 			goto out;
 		}
 	}
 	entry->qp = &(*cur_qp)->ibqp;
 	if (is_send) {
 		wq = &(*cur_qp)->sq;
 		wqe_index = ((be32_to_cpu(cqe->wqe) - (*cur_qp)->send_wqe_offset)
 			     >> wq->wqe_shift);
 		entry->wr_id = (*cur_qp)->wrid[wqe_index];
 	} else if ((*cur_qp)->ibqp.srq) {
 		struct mthca_srq *srq = to_msrq((*cur_qp)->ibqp.srq);
 		u32 wqe = be32_to_cpu(cqe->wqe);
 		wq = NULL;
 		wqe_index = wqe >> srq->wqe_shift;
 		entry->wr_id = srq->wrid[wqe_index];
 		mthca_free_srq_wqe(srq, wqe);
 	} else {
 		s32 wqe;
 		wq = &(*cur_qp)->rq;
 		wqe = be32_to_cpu(cqe->wqe);
 		wqe_index = wqe >> wq->wqe_shift;
 		/*
 		 * WQE addr == base - 1 might be reported in receive completion
 		 * with error instead of (rq size - 1) by Sinai FW 1.0.800 and
 		 * Arbel FW 5.1.400.  This bug should be fixed in later FW revs.
 		 */
 		if (unlikely(wqe_index < 0))
 			wqe_index = wq->max - 1;
 		entry->wr_id = (*cur_qp)->wrid[wqe_index + (*cur_qp)->sq.max];
 	}
 	if (wq) {
 		if (wq->last_comp < wqe_index)
 			wq->tail += wqe_index - wq->last_comp;
 		else
 			wq->tail += wqe_index + wq->max - wq->last_comp;
 		wq->last_comp = wqe_index;
 	}
 	if (is_error) {
 		handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send,
 				 (struct mthca_err_cqe *) cqe,
 				 entry, &free_cqe);
 		goto out;
 	}
 	if (is_send) {
 		entry->wc_flags = 0;
 		switch (cqe->opcode) {
 		case MTHCA_OPCODE_RDMA_WRITE:
 			entry->opcode    = IB_WC_RDMA_WRITE;
 			break;
 		case MTHCA_OPCODE_RDMA_WRITE_IMM:
 			entry->opcode    = IB_WC_RDMA_WRITE;
 			entry->wc_flags |= IB_WC_WITH_IMM;
 			break;
 		case MTHCA_OPCODE_SEND:
 			entry->opcode    = IB_WC_SEND;
 			break;
 		case MTHCA_OPCODE_SEND_IMM:
 			entry->opcode    = IB_WC_SEND;
 			entry->wc_flags |= IB_WC_WITH_IMM;
 			break;
 		case MTHCA_OPCODE_RDMA_READ:
 			entry->opcode    = IB_WC_RDMA_READ;
 			entry->byte_len  = be32_to_cpu(cqe->byte_cnt);
 			break;
 		case MTHCA_OPCODE_ATOMIC_CS:
 			entry->opcode    = IB_WC_COMP_SWAP;
 			entry->byte_len  = MTHCA_ATOMIC_BYTE_LEN;
 			break;
 		case MTHCA_OPCODE_ATOMIC_FA:
 			entry->opcode    = IB_WC_FETCH_ADD;
 			entry->byte_len  = MTHCA_ATOMIC_BYTE_LEN;
 			break;
 		case MTHCA_OPCODE_BIND_MW:
 			entry->opcode    = IB_WC_BIND_MW;
 			break;
 		default:
 			entry->opcode    = MTHCA_OPCODE_INVALID;
 			break;
 		}
 	} else {
 		entry->byte_len = be32_to_cpu(cqe->byte_cnt);
 		switch (cqe->opcode & 0x1f) {
 		case IB_OPCODE_SEND_LAST_WITH_IMMEDIATE:
 		case IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE:
 			entry->wc_flags = IB_WC_WITH_IMM;
 			entry->ex.imm_data = cqe->imm_etype_pkey_eec;
 			entry->opcode = IB_WC_RECV;
 			break;
 		case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE:
 		case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE:
 			entry->wc_flags = IB_WC_WITH_IMM;
 			entry->ex.imm_data = cqe->imm_etype_pkey_eec;
 			entry->opcode = IB_WC_RECV_RDMA_WITH_IMM;
 			break;
 		default:
 			entry->wc_flags = 0;
 			entry->opcode = IB_WC_RECV;
 			break;
 		}
 		entry->slid 	   = be16_to_cpu(cqe->rlid);
 		entry->sl   	   = cqe->sl_ipok >> 4;
 		entry->src_qp 	   = be32_to_cpu(cqe->rqpn) & 0xffffff;
 		entry->dlid_path_bits = cqe->g_mlpath & 0x7f;
 		entry->pkey_index  = be32_to_cpu(cqe->imm_etype_pkey_eec) >> 16;
 		entry->wc_flags   |= cqe->g_mlpath & 0x80 ? IB_WC_GRH : 0;
 		checksum = (be32_to_cpu(cqe->rqpn) >> 24) |
 				((be32_to_cpu(cqe->my_ee) >> 16) & 0xff00);
 		entry->csum_ok = (cqe->sl_ipok & 1 && checksum == 0xffff);
 	}
 	entry->status = IB_WC_SUCCESS;
 out:
 	if (likely(free_cqe)) {
 		set_cqe_hw(cqe);
 		++(*freed);
 		++cq->cons_index;
 	}
 	return err;
 }
 int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
 		  struct ib_wc *entry)
 {
 	struct mthca_dev *dev = to_mdev(ibcq->device);
 	struct mthca_cq *cq = to_mcq(ibcq);
 	struct mthca_qp *qp = NULL;
 	unsigned long flags;
 	int err = 0;
 	int freed = 0;
 	int npolled;
 	spin_lock_irqsave(&cq->lock, flags);
 	npolled = 0;
 repoll:
 	while (npolled < num_entries) {
 		err = mthca_poll_one(dev, cq, &qp,
 				     &freed, entry + npolled);
 		if (err)
 			break;
 		++npolled;
 	}
 	if (freed) {
 		wmb();
 		update_cons_index(dev, cq, freed);
 	}
 	/*
 	 * If a CQ resize is in progress and we discovered that the
 	 * old buffer is empty, then peek in the new buffer, and if
 	 * it's not empty, switch to the new buffer and continue
 	 * polling there.
 	 */
 	if (unlikely(err == -EAGAIN && cq->resize_buf &&
 		     cq->resize_buf->state == CQ_RESIZE_READY)) {
 		/*
 		 * In Tavor mode, the hardware keeps the producer
 		 * index modulo the CQ size.  Since we might be making
 		 * the CQ bigger, we need to mask our consumer index
 		 * using the size of the old CQ buffer before looking
 		 * in the new CQ buffer.
 		 */
 		if (!mthca_is_memfree(dev))
 			cq->cons_index &= cq->ibcq.cqe;
 		if (cqe_sw(get_cqe_from_buf(&cq->resize_buf->buf,
 					    cq->cons_index & cq->resize_buf->cqe))) {
 			struct mthca_cq_buf tbuf;
 			int tcqe;
 			tbuf         = cq->buf;
 			tcqe         = cq->ibcq.cqe;
 			cq->buf      = cq->resize_buf->buf;
 			cq->ibcq.cqe = cq->resize_buf->cqe;
 			cq->resize_buf->buf   = tbuf;
 			cq->resize_buf->cqe   = tcqe;
 			cq->resize_buf->state = CQ_RESIZE_SWAPPED;
 			goto repoll;
 		}
 	}
 	spin_unlock_irqrestore(&cq->lock, flags);
 	return err == 0 || err == -EAGAIN ? npolled : err;
 }
 int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags)
 {
 	u32 dbhi = ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
 		    MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL :
 		    MTHCA_TAVOR_CQ_DB_REQ_NOT) |
 		to_mcq(cq)->cqn;
 	mthca_write64(dbhi, 0xffffffff, to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL,
 		      MTHCA_GET_DOORBELL_LOCK(&to_mdev(cq->device)->doorbell_lock));
 	return 0;
 }
 int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 {
 	struct mthca_cq *cq = to_mcq(ibcq);
 	__be32 db_rec[2];
 	u32 dbhi;
 	u32 sn = cq->arm_sn & 3;
 	db_rec[0] = cpu_to_be32(cq->cons_index);
 	db_rec[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) |
 				((flags & IB_CQ_SOLICITED_MASK) ==
 				 IB_CQ_SOLICITED ? 1 : 2));
 	mthca_write_db_rec(db_rec, cq->arm_db);
 	/*
 	 * Make sure that the doorbell record in host memory is
 	 * written before ringing the doorbell via PCI MMIO.
 	 */
 	wmb();
 	dbhi = (sn << 28) |
 		((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
 		 MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL :
 		 MTHCA_ARBEL_CQ_DB_REQ_NOT) | cq->cqn;
 	mthca_write64(dbhi, cq->cons_index,
 		      to_mdev(ibcq->device)->kar + MTHCA_CQ_DOORBELL,
 		      MTHCA_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->doorbell_lock));
 	return 0;
 }
 int mthca_init_cq(struct mthca_dev *dev, int nent,
 		  struct mthca_ucontext *ctx, u32 pdn,
 		  struct mthca_cq *cq)
 {
 	struct mthca_mailbox *mailbox;
 	struct mthca_cq_context *cq_context;
 	int err = -ENOMEM;
 	u8 status;
 	cq->ibcq.cqe  = nent - 1;
 	cq->is_kernel = !ctx;
 	cq->cqn = mthca_alloc(&dev->cq_table.alloc);
 	if (cq->cqn == -1)
 		return -ENOMEM;
 	if (mthca_is_memfree(dev)) {
 		err = mthca_table_get(dev, dev->cq_table.table, cq->cqn);
 		if (err)
 			goto err_out;
 		if (cq->is_kernel) {
 			cq->arm_sn = 1;
 			err = -ENOMEM;
 			cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI,
 							     cq->cqn, &cq->set_ci_db);
 			if (cq->set_ci_db_index < 0)
 				goto err_out_icm;
 			cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM,
 							  cq->cqn, &cq->arm_db);
 			if (cq->arm_db_index < 0)
 				goto err_out_ci;
 		}
 	}
 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 	if (IS_ERR(mailbox))
 		goto err_out_arm;
 	cq_context = mailbox->buf;
 	if (cq->is_kernel) {
 		err = mthca_alloc_cq_buf(dev, &cq->buf, nent);
 		if (err)
 			goto err_out_mailbox;
 	}
 	spin_lock_init(&cq->lock);
 	cq->refcount = 1;
 	init_waitqueue_head(&cq->wait);
 	mutex_init(&cq->mutex);
 	memset(cq_context, 0, sizeof *cq_context);
 	cq_context->flags           = cpu_to_be32(MTHCA_CQ_STATUS_OK      |
 						  MTHCA_CQ_STATE_DISARMED |
 						  MTHCA_CQ_FLAG_TR);
 	cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24);
 	if (ctx)
 		cq_context->logsize_usrpage |= cpu_to_be32(ctx->uar.index);
 	else
 		cq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index);
 	cq_context->error_eqn       = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
 	cq_context->comp_eqn        = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn);
 	cq_context->pd              = cpu_to_be32(pdn);
 	cq_context->lkey            = cpu_to_be32(cq->buf.mr.ibmr.lkey);
 	cq_context->cqn             = cpu_to_be32(cq->cqn);
 	if (mthca_is_memfree(dev)) {
 		cq_context->ci_db    = cpu_to_be32(cq->set_ci_db_index);
 		cq_context->state_db = cpu_to_be32(cq->arm_db_index);
 	}
 	err = mthca_SW2HW_CQ(dev, mailbox, cq->cqn, &status);
 	if (err) {
 		mthca_warn(dev, "SW2HW_CQ failed (%d)\n", err);
 		goto err_out_free_mr;
 	}
 	if (status) {
 		mthca_warn(dev, "SW2HW_CQ returned status 0x%02x\n",
 			   status);
 		err = -EINVAL;
 		goto err_out_free_mr;
 	}
 	spin_lock_irq(&dev->cq_table.lock);
 	if (mthca_array_set(&dev->cq_table.cq,
 			    cq->cqn & (dev->limits.num_cqs - 1),
 			    cq)) {
 		spin_unlock_irq(&dev->cq_table.lock);
 		goto err_out_free_mr;
 	}
 	spin_unlock_irq(&dev->cq_table.lock);
 	cq->cons_index = 0;
 	mthca_free_mailbox(dev, mailbox);
 	return 0;
 err_out_free_mr:
 	if (cq->is_kernel)
 		mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
 err_out_mailbox:
 	mthca_free_mailbox(dev, mailbox);
 err_out_arm:
 	if (cq->is_kernel && mthca_is_memfree(dev))
 		mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
 err_out_ci:
 	if (cq->is_kernel && mthca_is_memfree(dev))
 		mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
 err_out_icm:
 	mthca_table_put(dev, dev->cq_table.table, cq->cqn);
 err_out:
 	mthca_free(&dev->cq_table.alloc, cq->cqn);
 	return err;
 }
 static inline int get_cq_refcount(struct mthca_dev *dev, struct mthca_cq *cq)
 {
 	int c;
 	spin_lock_irq(&dev->cq_table.lock);
 	c = cq->refcount;
 	spin_unlock_irq(&dev->cq_table.lock);
 	return c;
 }
 void mthca_free_cq(struct mthca_dev *dev,
 		   struct mthca_cq *cq)
 {
 	struct mthca_mailbox *mailbox;
 	int err;
 	u8 status;
 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 	if (IS_ERR(mailbox)) {
 		mthca_warn(dev, "No memory for mailbox to free CQ.\n");
 		return;
 	}
 	err = mthca_HW2SW_CQ(dev, mailbox, cq->cqn, &status);
 	if (err)
 		mthca_warn(dev, "HW2SW_CQ failed (%d)\n", err);
 	else if (status)
 		mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n", status);
 	if (0) {
 		__be32 *ctx = mailbox->buf;
 		int j;
 		printk(KERN_ERR "context for CQN %x (cons index %x, next sw %d)\n",
 		       cq->cqn, cq->cons_index,
 		       cq->is_kernel ? !!next_cqe_sw(cq) : 0);
 		for (j = 0; j < 16; ++j)
 			printk(KERN_ERR "[%2x] %08x\n", j * 4, be32_to_cpu(ctx[j]));
 	}
 	spin_lock_irq(&dev->cq_table.lock);
 	mthca_array_clear(&dev->cq_table.cq,
 			  cq->cqn & (dev->limits.num_cqs - 1));
 	--cq->refcount;
 	spin_unlock_irq(&dev->cq_table.lock);
 	if (dev->mthca_flags & MTHCA_FLAG_MSI_X)
 		synchronize_irq(dev->eq_table.eq[MTHCA_EQ_COMP].msi_x_vector);
 	else
 		synchronize_irq(dev->pdev->irq);
 	wait_event(cq->wait, !get_cq_refcount(dev, cq));
 	if (cq->is_kernel) {
 		mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
 		if (mthca_is_memfree(dev)) {
 			mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM,    cq->arm_db_index);
 			mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
 		}
 	}
 	mthca_table_put(dev, dev->cq_table.table, cq->cqn);
 	mthca_free(&dev->cq_table.alloc, cq->cqn);
 	mthca_free_mailbox(dev, mailbox);
 }
 int mthca_init_cq_table(struct mthca_dev *dev)
 {
 	int err;
 	spin_lock_init(&dev->cq_table.lock);
 	err = mthca_alloc_init(&dev->cq_table.alloc,
 			       dev->limits.num_cqs,
 			       (1 << 24) - 1,
 			       dev->limits.reserved_cqs);
 	if (err)
 		return err;
 	err = mthca_array_init(&dev->cq_table.cq,
 			       dev->limits.num_cqs);
 	if (err)
 		mthca_alloc_cleanup(&dev->cq_table.alloc);
 	return err;
 }
 void mthca_cleanup_cq_table(struct mthca_dev *dev)
 {
 	mthca_array_cleanup(&dev->cq_table.cq, dev->limits.num_cqs);
 	mthca_alloc_cleanup(&dev->cq_table.alloc);
 }
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_dev.h
@ -1,597 +0,0 @@
 /*
 * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
 * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
 * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #ifndef MTHCA_DEV_H
 #define MTHCA_DEV_H
 #include <linux/spinlock.h>
 #include <linux/kernel.h>
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
 #include <linux/timer.h>
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/semaphore.h>
 #include <linux/wait.h>
 #include "mthca_provider.h"
 #include "mthca_doorbell.h"
 #define DRV_NAME	"ib_mthca"
 #define PFX		DRV_NAME ": "
 #define DRV_VERSION	"1.0-ofed1.5.2"
 #define DRV_RELDATE	"August 4, 2010"
 enum {
 	MTHCA_FLAG_DDR_HIDDEN = 1 << 1,
 	MTHCA_FLAG_SRQ        = 1 << 2,
 	MTHCA_FLAG_MSI_X      = 1 << 3,
 	MTHCA_FLAG_NO_LAM     = 1 << 4,
 	MTHCA_FLAG_FMR        = 1 << 5,
 	MTHCA_FLAG_MEMFREE    = 1 << 6,
 	MTHCA_FLAG_PCIE       = 1 << 7,
 	MTHCA_FLAG_SINAI_OPT  = 1 << 8
 };
 enum {
 	MTHCA_MAX_PORTS = 2
 };
 enum {
 	MTHCA_BOARD_ID_LEN = 64
 };
 enum {
 	MTHCA_EQ_CONTEXT_SIZE =  0x40,
 	MTHCA_CQ_CONTEXT_SIZE =  0x40,
 	MTHCA_QP_CONTEXT_SIZE = 0x200,
 	MTHCA_RDB_ENTRY_SIZE  =  0x20,
 	MTHCA_AV_SIZE         =  0x20,
 	MTHCA_MGM_ENTRY_SIZE  = 0x100,
 	/* Arbel FW gives us these, but we need them for Tavor */
 	MTHCA_MPT_ENTRY_SIZE  =  0x40,
 	MTHCA_MTT_SEG_SIZE    =  0x40,
 	MTHCA_QP_PER_MGM      = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2)
 };
 enum {
 	MTHCA_EQ_CMD,
 	MTHCA_EQ_ASYNC,
 	MTHCA_EQ_COMP,
 	MTHCA_NUM_EQ
 };
 enum {
 	MTHCA_OPCODE_NOP            = 0x00,
 	MTHCA_OPCODE_RDMA_WRITE     = 0x08,
 	MTHCA_OPCODE_RDMA_WRITE_IMM = 0x09,
 	MTHCA_OPCODE_SEND           = 0x0a,
 	MTHCA_OPCODE_SEND_IMM       = 0x0b,
 	MTHCA_OPCODE_RDMA_READ      = 0x10,
 	MTHCA_OPCODE_ATOMIC_CS      = 0x11,
 	MTHCA_OPCODE_ATOMIC_FA      = 0x12,
 	MTHCA_OPCODE_BIND_MW        = 0x18,
 	MTHCA_OPCODE_INVALID        = 0xff
 };
 enum {
 	MTHCA_CMD_USE_EVENTS         = 1 << 0,
 	MTHCA_CMD_POST_DOORBELLS     = 1 << 1
 };
 enum {
 	MTHCA_CMD_NUM_DBELL_DWORDS = 8
 };
 struct mthca_cmd {
 	struct pci_pool          *pool;
 	struct mutex              hcr_mutex;
 	struct semaphore 	  poll_sem;
 	struct semaphore 	  event_sem;
 	int              	  max_cmds;
 	spinlock_t                context_lock;
 	int                       free_head;
 	struct mthca_cmd_context *context;
 	u16                       token_mask;
 	u32                       flags;
 	void __iomem             *dbell_map;
 	u16                       dbell_offsets[MTHCA_CMD_NUM_DBELL_DWORDS];
 };
 struct mthca_limits {
 	int      num_ports;
 	int      vl_cap;
 	int      mtu_cap;
 	int      gid_table_len;
 	int      pkey_table_len;
 	int      local_ca_ack_delay;
 	int      num_uars;
 	int      max_sg;
 	int      num_qps;
 	int      max_wqes;
 	int	 max_desc_sz;
 	int	 max_qp_init_rdma;
 	int      reserved_qps;
 	int      num_srqs;
 	int      max_srq_wqes;
 	int      max_srq_sge;
 	int      reserved_srqs;
 	int      num_eecs;
 	int      reserved_eecs;
 	int      num_cqs;
 	int      max_cqes;
 	int      reserved_cqs;
 	int      num_eqs;
 	int      reserved_eqs;
 	int      num_mpts;
 	int      num_mtt_segs;
 	int	 mtt_seg_size;
 	int      fmr_reserved_mtts;
 	int      reserved_mtts;
 	int      reserved_mrws;
 	int      reserved_uars;
 	int      num_mgms;
 	int      num_amgms;
 	int      reserved_mcgs;
 	int      num_pds;
 	int      reserved_pds;
 	u32      page_size_cap;
 	u32      flags;
 	u16      stat_rate_support;
 	u8       port_width_cap;
 };
 struct mthca_alloc {
 	u32            last;
 	u32            top;
 	u32            max;
 	u32            mask;
 	spinlock_t     lock;
 	unsigned long *table;
 };
 struct mthca_array {
 	struct {
 		void    **page;
 		int       used;
 	} *page_list;
 };
 struct mthca_uar_table {
 	struct mthca_alloc alloc;
 	u64                uarc_base;
 	int                uarc_size;
 };
 struct mthca_pd_table {
 	struct mthca_alloc alloc;
 };
 struct mthca_buddy {
 	unsigned long **bits;
 	int	       *num_free;
 	int             max_order;
 	spinlock_t      lock;
 };
 struct mthca_mr_table {
 	struct mthca_alloc      mpt_alloc;
 	struct mthca_buddy      mtt_buddy;
 	struct mthca_buddy     *fmr_mtt_buddy;
 	u64                     mtt_base;
 	u64                     mpt_base;
 	struct mthca_icm_table *mtt_table;
 	struct mthca_icm_table *mpt_table;
 	struct {
 		void __iomem   *mpt_base;
 		void __iomem   *mtt_base;
 		struct mthca_buddy mtt_buddy;
 	} tavor_fmr;
 };
 struct mthca_eq_table {
 	struct mthca_alloc alloc;
 	void __iomem      *clr_int;
 	u32                clr_mask;
 	u32                arm_mask;
 	struct mthca_eq    eq[MTHCA_NUM_EQ];
 	u64                icm_virt;
 	struct page       *icm_page;
 	dma_addr_t         icm_dma;
 	int                have_irq;
 	u8                 inta_pin;
 };
 struct mthca_cq_table {
 	struct mthca_alloc 	alloc;
 	spinlock_t         	lock;
 	struct mthca_array      cq;
 	struct mthca_icm_table *table;
 };
 struct mthca_srq_table {
 	struct mthca_alloc 	alloc;
 	spinlock_t         	lock;
 	struct mthca_array      srq;
 	struct mthca_icm_table *table;
 };
 struct mthca_qp_table {
 	struct mthca_alloc     	alloc;
 	u32                    	rdb_base;
 	int                    	rdb_shift;
 	int                    	sqp_start;
 	spinlock_t             	lock;
 	struct mthca_array     	qp;
 	struct mthca_icm_table *qp_table;
 	struct mthca_icm_table *eqp_table;
 	struct mthca_icm_table *rdb_table;
 };
 struct mthca_av_table {
 	struct pci_pool   *pool;
 	int                num_ddr_avs;
 	u64                ddr_av_base;
 	void __iomem      *av_map;
 	struct mthca_alloc alloc;
 };
 struct mthca_mcg_table {
 	struct mutex		mutex;
 	struct mthca_alloc 	alloc;
 	struct mthca_icm_table *table;
 };
 struct mthca_catas_err {
 	u64			addr;
 	u32 __iomem	       *map;
 	u32			size;
 	struct timer_list	timer;
 	struct list_head	list;
 };
 extern struct mutex mthca_device_mutex;
 struct mthca_dev {
 	struct ib_device  ib_dev;
 	struct pci_dev   *pdev;
 	int          	 hca_type;
 	unsigned long	 mthca_flags;
 	unsigned long    device_cap_flags;
 	u32              rev_id;
 	char             board_id[MTHCA_BOARD_ID_LEN];
 	/* firmware info */
 	u64              fw_ver;
 	union {
 		struct {
 			u64 fw_start;
 			u64 fw_end;
 		}        tavor;
 		struct {
 			u64 clr_int_base;
 			u64 eq_arm_base;
 			u64 eq_set_ci_base;
 			struct mthca_icm *fw_icm;
 			struct mthca_icm *aux_icm;
 			u16 fw_pages;
 		}        arbel;
 	}                fw;
 	u64              ddr_start;
 	u64              ddr_end;
 	MTHCA_DECLARE_DOORBELL_LOCK(doorbell_lock)
 	struct mutex cap_mask_mutex;
 	void __iomem    *hcr;
 	void __iomem    *kar;
 	void __iomem    *clr_base;
 	union {
 		struct {
 			void __iomem *ecr_base;
 		} tavor;
 		struct {
 			void __iomem *eq_arm;
 			void __iomem *eq_set_ci_base;
 		} arbel;
 	} eq_regs;
 	struct mthca_cmd    cmd;
 	struct mthca_limits limits;
 	struct mthca_uar_table uar_table;
 	struct mthca_pd_table  pd_table;
 	struct mthca_mr_table  mr_table;
 	struct mthca_eq_table  eq_table;
 	struct mthca_cq_table  cq_table;
 	struct mthca_srq_table srq_table;
 	struct mthca_qp_table  qp_table;
 	struct mthca_av_table  av_table;
 	struct mthca_mcg_table mcg_table;
 	struct mthca_catas_err catas_err;
 	struct mthca_uar       driver_uar;
 	struct mthca_db_table *db_tab;
 	struct mthca_pd        driver_pd;
 	struct mthca_mr        driver_mr;
 	struct ib_mad_agent  *send_agent[MTHCA_MAX_PORTS][2];
 	struct ib_ah         *sm_ah[MTHCA_MAX_PORTS];
 	spinlock_t            sm_lock;
 	u8                    rate[MTHCA_MAX_PORTS];
 	int		      active;
 };
 #ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
 extern int mthca_debug_level;
 #define mthca_dbg(mdev, format, arg...)					\
 	do {								\
 		if (mthca_debug_level)					\
 			dev_printk(KERN_DEBUG, &mdev->pdev->dev, format, ## arg); \
 	} while (0)
 #else /* CONFIG_INFINIBAND_MTHCA_DEBUG */
 #define mthca_dbg(mdev, format, arg...) do { (void) mdev; } while (0)
 #endif /* CONFIG_INFINIBAND_MTHCA_DEBUG */
 #define mthca_err(mdev, format, arg...) \
 	dev_err(&mdev->pdev->dev, format, ## arg)
 #define mthca_info(mdev, format, arg...) \
 	dev_info(&mdev->pdev->dev, format, ## arg)
 #define mthca_warn(mdev, format, arg...) \
 	dev_warn(&mdev->pdev->dev, format, ## arg)
 extern void __buggy_use_of_MTHCA_GET(void);
 extern void __buggy_use_of_MTHCA_PUT(void);
 #define MTHCA_GET(dest, source, offset)                               \
 	do {                                                          \
 		void *__p = (char *) (source) + (offset);             \
 		switch (sizeof (dest)) {                              \
 		case 1: (dest) = *(u8 *) __p;       break;	      \
 		case 2: (dest) = be16_to_cpup(__p); break;	      \
 		case 4: (dest) = be32_to_cpup(__p); break;	      \
 		case 8: (dest) = be64_to_cpup(__p); break;	      \
 		default: __buggy_use_of_MTHCA_GET();		      \
 		}                                                     \
 	} while (0)
 #define MTHCA_PUT(dest, source, offset)                               \
 	do {                                                          \
 		void *__d = ((char *) (dest) + (offset));	      \
 		switch (sizeof(source)) {                             \
 		case 1: *(u8 *) __d = (source);                break; \
 		case 2:	*(__be16 *) __d = cpu_to_be16(source); break; \
 		case 4:	*(__be32 *) __d = cpu_to_be32(source); break; \
 		case 8:	*(__be64 *) __d = cpu_to_be64(source); break; \
 		default: __buggy_use_of_MTHCA_PUT();		      \
 		}                                                     \
 	} while (0)
 int mthca_reset(struct mthca_dev *mdev);
 u32 mthca_alloc(struct mthca_alloc *alloc);
 void mthca_free(struct mthca_alloc *alloc, u32 obj);
 int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask,
 		     u32 reserved);
 void mthca_alloc_cleanup(struct mthca_alloc *alloc);
 void *mthca_array_get(struct mthca_array *array, int index);
 int mthca_array_set(struct mthca_array *array, int index, void *value);
 void mthca_array_clear(struct mthca_array *array, int index);
 int mthca_array_init(struct mthca_array *array, int nent);
 void mthca_array_cleanup(struct mthca_array *array, int nent);
 int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct,
 		    union mthca_buf *buf, int *is_direct, struct mthca_pd *pd,
 		    int hca_write, struct mthca_mr *mr);
 void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf,
 		    int is_direct, struct mthca_mr *mr);
 int mthca_init_uar_table(struct mthca_dev *dev);
 int mthca_init_pd_table(struct mthca_dev *dev);
 int mthca_init_mr_table(struct mthca_dev *dev);
 int mthca_init_eq_table(struct mthca_dev *dev);
 int mthca_init_cq_table(struct mthca_dev *dev);
 int mthca_init_srq_table(struct mthca_dev *dev);
 int mthca_init_qp_table(struct mthca_dev *dev);
 int mthca_init_av_table(struct mthca_dev *dev);
 int mthca_init_mcg_table(struct mthca_dev *dev);
 void mthca_cleanup_uar_table(struct mthca_dev *dev);
 void mthca_cleanup_pd_table(struct mthca_dev *dev);
 void mthca_cleanup_mr_table(struct mthca_dev *dev);
 void mthca_cleanup_eq_table(struct mthca_dev *dev);
 void mthca_cleanup_cq_table(struct mthca_dev *dev);
 void mthca_cleanup_srq_table(struct mthca_dev *dev);
 void mthca_cleanup_qp_table(struct mthca_dev *dev);
 void mthca_cleanup_av_table(struct mthca_dev *dev);
 void mthca_cleanup_mcg_table(struct mthca_dev *dev);
 int mthca_register_device(struct mthca_dev *dev);
 void mthca_unregister_device(struct mthca_dev *dev);
 void mthca_start_catas_poll(struct mthca_dev *dev);
 void mthca_stop_catas_poll(struct mthca_dev *dev);
 int __mthca_restart_one(struct pci_dev *pdev);
 int mthca_catas_init(void);
 void mthca_catas_cleanup(void);
 int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
 void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
 int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd);
 void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd);
 int mthca_write_mtt_size(struct mthca_dev *dev);
 struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size);
 void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt);
 int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
 		    int start_index, u64 *buffer_list, int list_len);
 int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
 		   u64 iova, u64 total_size, u32 access, struct mthca_mr *mr);
 int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
 			   u32 access, struct mthca_mr *mr);
 int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
 			u64 *buffer_list, int buffer_size_shift,
 			int list_len, u64 iova, u64 total_size,
 			u32 access, struct mthca_mr *mr);
 void mthca_free_mr(struct mthca_dev *dev,  struct mthca_mr *mr);
 int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
 		    u32 access, struct mthca_fmr *fmr);
 int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
 			     int list_len, u64 iova);
 void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr);
 int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
 			     int list_len, u64 iova);
 void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr);
 int mthca_free_fmr(struct mthca_dev *dev,  struct mthca_fmr *fmr);
 int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt);
 void mthca_unmap_eq_icm(struct mthca_dev *dev);
 int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
 		  struct ib_wc *entry);
 int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
 int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
 int mthca_init_cq(struct mthca_dev *dev, int nent,
 		  struct mthca_ucontext *ctx, u32 pdn,
 		  struct mthca_cq *cq);
 void mthca_free_cq(struct mthca_dev *dev,
 		   struct mthca_cq *cq);
 void mthca_cq_completion(struct mthca_dev *dev, u32 cqn);
 void mthca_cq_event(struct mthca_dev *dev, u32 cqn,
 		    enum ib_event_type event_type);
 void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
 		    struct mthca_srq *srq);
 void mthca_cq_resize_copy_cqes(struct mthca_cq *cq);
 int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent);
 void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe);
 int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
 		    struct ib_srq_attr *attr, struct mthca_srq *srq);
 void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
 int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 		     enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
 int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
 int mthca_max_srq_sge(struct mthca_dev *dev);
 void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
 		     enum ib_event_type event_type);
 void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr);
 int mthca_tavor_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr,
 			      struct ib_recv_wr **bad_wr);
 int mthca_arbel_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr,
 			      struct ib_recv_wr **bad_wr);
 void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
 		    enum ib_event_type event_type);
 int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
 		   struct ib_qp_init_attr *qp_init_attr);
 int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
 		    struct ib_udata *udata);
 int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			  struct ib_send_wr **bad_wr);
 int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 			     struct ib_recv_wr **bad_wr);
 int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			  struct ib_send_wr **bad_wr);
 int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 			     struct ib_recv_wr **bad_wr);
 void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
 			int index, int *dbd, __be32 *new_wqe);
 int mthca_alloc_qp(struct mthca_dev *dev,
 		   struct mthca_pd *pd,
 		   struct mthca_cq *send_cq,
 		   struct mthca_cq *recv_cq,
 		   enum ib_qp_type type,
 		   enum ib_sig_type send_policy,
 		   struct ib_qp_cap *cap,
 		   struct mthca_qp *qp);
 int mthca_alloc_sqp(struct mthca_dev *dev,
 		    struct mthca_pd *pd,
 		    struct mthca_cq *send_cq,
 		    struct mthca_cq *recv_cq,
 		    enum ib_sig_type send_policy,
 		    struct ib_qp_cap *cap,
 		    int qpn,
 		    int port,
 		    struct mthca_sqp *sqp);
 void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp);
 int mthca_create_ah(struct mthca_dev *dev,
 		    struct mthca_pd *pd,
 		    struct ib_ah_attr *ah_attr,
 		    struct mthca_ah *ah);
 int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah);
 int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
 		  struct ib_ud_header *header);
 int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr);
 int mthca_ah_grh_present(struct mthca_ah *ah);
 u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port);
 enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port);
 int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
 int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
 int mthca_process_mad(struct ib_device *ibdev,
 		      int mad_flags,
 		      u8 port_num,
 		      struct ib_wc *in_wc,
 		      struct ib_grh *in_grh,
 		      struct ib_mad *in_mad,
 		      struct ib_mad *out_mad);
 int mthca_create_agents(struct mthca_dev *dev);
 void mthca_free_agents(struct mthca_dev *dev);
 static inline struct mthca_dev *to_mdev(struct ib_device *ibdev)
 {
 	return container_of(ibdev, struct mthca_dev, ib_dev);
 }
 static inline int mthca_is_memfree(struct mthca_dev *dev)
 {
 	return dev->mthca_flags & MTHCA_FLAG_MEMFREE;
 }
 #endif /* MTHCA_DEV_H */
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_doorbell.h
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_doorbell.h
@ -1,109 +0,0 @@
 /*
 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #include <linux/types.h>
 #define MTHCA_RD_DOORBELL      0x00
 #define MTHCA_SEND_DOORBELL    0x10
 #define MTHCA_RECEIVE_DOORBELL 0x18
 #define MTHCA_CQ_DOORBELL      0x20
 #define MTHCA_EQ_DOORBELL      0x28
 #if BITS_PER_LONG == 64
 /*
 * Assume that we can just write a 64-bit doorbell atomically.  s390
 * actually doesn't have writeq() but S/390 systems don't even have
 * PCI so we won't worry about it.
 */
 #define MTHCA_DECLARE_DOORBELL_LOCK(name)
 #define MTHCA_INIT_DOORBELL_LOCK(ptr)    do { } while (0)
 #define MTHCA_GET_DOORBELL_LOCK(ptr)      (NULL)
 static inline void mthca_write64_raw(__be64 val, void __iomem *dest)
 {
 	__raw_writeq((__force u64) val, dest);
 }
 static inline void mthca_write64(u32 hi, u32 lo, void __iomem *dest,
 				 spinlock_t *doorbell_lock)
 {
 	__raw_writeq((__force u64) cpu_to_be64((u64) hi << 32 | lo), dest);
 }
 static inline void mthca_write_db_rec(__be32 val[2], __be32 *db)
 {
 	*(u64 *) db = *(u64 *) val;
 }
 #else
 /*
 * Just fall back to a spinlock to protect the doorbell if
 * BITS_PER_LONG is 32 -- there's no portable way to do atomic 64-bit
 * MMIO writes.
 */
 #define MTHCA_DECLARE_DOORBELL_LOCK(name) spinlock_t name;
 #define MTHCA_INIT_DOORBELL_LOCK(ptr)     spin_lock_init(ptr)
 #define MTHCA_GET_DOORBELL_LOCK(ptr)      (ptr)
 static inline void mthca_write64_raw(__be64 val, void __iomem *dest)
 {
 	__raw_writel(((__force u32 *) &val)[0], dest);
 	__raw_writel(((__force u32 *) &val)[1], dest + 4);
 }
 static inline void mthca_write64(u32 hi, u32 lo, void __iomem *dest,
 				 spinlock_t *doorbell_lock)
 {
 	unsigned long flags;
 	hi = (__force u32) cpu_to_be32(hi);
 	lo = (__force u32) cpu_to_be32(lo);
 	spin_lock_irqsave(doorbell_lock, flags);
 	__raw_writel(hi, dest);
 	__raw_writel(lo, dest + 4);
 	spin_unlock_irqrestore(doorbell_lock, flags);
 }
 static inline void mthca_write_db_rec(__be32 val[2], __be32 *db)
 {
 	db[0] = val[0];
 	wmb();
 	db[1] = val[1];
 }
 #endif
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_eq.c
@ -1,920 +0,0 @@
 /*
 * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/pci.h>
 #include "mthca_dev.h"
 #include "mthca_cmd.h"
 #include "mthca_config_reg.h"
 enum {
 	MTHCA_NUM_ASYNC_EQE = 0x80,
 	MTHCA_NUM_CMD_EQE   = 0x80,
 	MTHCA_NUM_SPARE_EQE = 0x80,
 	MTHCA_EQ_ENTRY_SIZE = 0x20
 };
 /*
 * Must be packed because start is 64 bits but only aligned to 32 bits.
 */
 struct mthca_eq_context {
 	__be32 flags;
 	__be64 start;
 	__be32 logsize_usrpage;
 	__be32 tavor_pd;	/* reserved for Arbel */
 	u8     reserved1[3];
 	u8     intr;
 	__be32 arbel_pd;	/* lost_count for Tavor */
 	__be32 lkey;
 	u32    reserved2[2];
 	__be32 consumer_index;
 	__be32 producer_index;
 	u32    reserved3[4];
 } __attribute__((packed));
 #define MTHCA_EQ_STATUS_OK          ( 0 << 28)
 #define MTHCA_EQ_STATUS_OVERFLOW    ( 9 << 28)
 #define MTHCA_EQ_STATUS_WRITE_FAIL  (10 << 28)
 #define MTHCA_EQ_OWNER_SW           ( 0 << 24)
 #define MTHCA_EQ_OWNER_HW           ( 1 << 24)
 #define MTHCA_EQ_FLAG_TR            ( 1 << 18)
 #define MTHCA_EQ_FLAG_OI            ( 1 << 17)
 #define MTHCA_EQ_STATE_ARMED        ( 1 <<  8)
 #define MTHCA_EQ_STATE_FIRED        ( 2 <<  8)
 #define MTHCA_EQ_STATE_ALWAYS_ARMED ( 3 <<  8)
 #define MTHCA_EQ_STATE_ARBEL        ( 8 <<  8)
 enum {
 	MTHCA_EVENT_TYPE_COMP       	    = 0x00,
 	MTHCA_EVENT_TYPE_PATH_MIG   	    = 0x01,
 	MTHCA_EVENT_TYPE_COMM_EST   	    = 0x02,
 	MTHCA_EVENT_TYPE_SQ_DRAINED 	    = 0x03,
 	MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE    = 0x13,
 	MTHCA_EVENT_TYPE_SRQ_LIMIT	    = 0x14,
 	MTHCA_EVENT_TYPE_CQ_ERROR   	    = 0x04,
 	MTHCA_EVENT_TYPE_WQ_CATAS_ERROR     = 0x05,
 	MTHCA_EVENT_TYPE_EEC_CATAS_ERROR    = 0x06,
 	MTHCA_EVENT_TYPE_PATH_MIG_FAILED    = 0x07,
 	MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10,
 	MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR    = 0x11,
 	MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR    = 0x12,
 	MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR  = 0x08,
 	MTHCA_EVENT_TYPE_PORT_CHANGE        = 0x09,
 	MTHCA_EVENT_TYPE_EQ_OVERFLOW        = 0x0f,
 	MTHCA_EVENT_TYPE_ECC_DETECT         = 0x0e,
 	MTHCA_EVENT_TYPE_CMD                = 0x0a
 };
 #define MTHCA_ASYNC_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_PATH_MIG)           | \
 				(1ULL << MTHCA_EVENT_TYPE_COMM_EST)           | \
 				(1ULL << MTHCA_EVENT_TYPE_SQ_DRAINED)         | \
 				(1ULL << MTHCA_EVENT_TYPE_CQ_ERROR)           | \
 				(1ULL << MTHCA_EVENT_TYPE_WQ_CATAS_ERROR)     | \
 				(1ULL << MTHCA_EVENT_TYPE_EEC_CATAS_ERROR)    | \
 				(1ULL << MTHCA_EVENT_TYPE_PATH_MIG_FAILED)    | \
 				(1ULL << MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
 				(1ULL << MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR)    | \
 				(1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR)  | \
 				(1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE)        | \
 				(1ULL << MTHCA_EVENT_TYPE_ECC_DETECT))
 #define MTHCA_SRQ_EVENT_MASK   ((1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR)    | \
 				(1ULL << MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE)    | \
 				(1ULL << MTHCA_EVENT_TYPE_SRQ_LIMIT))
 #define MTHCA_CMD_EVENT_MASK    (1ULL << MTHCA_EVENT_TYPE_CMD)
 #define MTHCA_EQ_DB_INC_CI     (1 << 24)
 #define MTHCA_EQ_DB_REQ_NOT    (2 << 24)
 #define MTHCA_EQ_DB_DISARM_CQ  (3 << 24)
 #define MTHCA_EQ_DB_SET_CI     (4 << 24)
 #define MTHCA_EQ_DB_ALWAYS_ARM (5 << 24)
 struct mthca_eqe {
 	u8 reserved1;
 	u8 type;
 	u8 reserved2;
 	u8 subtype;
 	union {
 		u32 raw[6];
 		struct {
 			__be32 cqn;
 		} __attribute__((packed)) comp;
 		struct {
 			u16    reserved1;
 			__be16 token;
 			u32    reserved2;
 			u8     reserved3[3];
 			u8     status;
 			__be64 out_param;
 		} __attribute__((packed)) cmd;
 		struct {
 			__be32 qpn;
 		} __attribute__((packed)) qp;
 		struct {
 			__be32 srqn;
 		} __attribute__((packed)) srq;
 		struct {
 			__be32 cqn;
 			u32    reserved1;
 			u8     reserved2[3];
 			u8     syndrome;
 		} __attribute__((packed)) cq_err;
 		struct {
 			u32    reserved1[2];
 			__be32 port;
 		} __attribute__((packed)) port_change;
 	} event;
 	u8 reserved3[3];
 	u8 owner;
 } __attribute__((packed));
 #define  MTHCA_EQ_ENTRY_OWNER_SW      (0 << 7)
 #define  MTHCA_EQ_ENTRY_OWNER_HW      (1 << 7)
 static inline u64 async_mask(struct mthca_dev *dev)
 {
 	return dev->mthca_flags & MTHCA_FLAG_SRQ ?
 		MTHCA_ASYNC_EVENT_MASK | MTHCA_SRQ_EVENT_MASK :
 		MTHCA_ASYNC_EVENT_MASK;
 }
 static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
 {
 	/*
 	 * This barrier makes sure that all updates to ownership bits
 	 * done by set_eqe_hw() hit memory before the consumer index
 	 * is updated.  set_eq_ci() allows the HCA to possibly write
 	 * more EQ entries, and we want to avoid the exceedingly
 	 * unlikely possibility of the HCA writing an entry and then
 	 * having set_eqe_hw() overwrite the owner field.
 	 */
 	wmb();
 	mthca_write64(MTHCA_EQ_DB_SET_CI | eq->eqn, ci & (eq->nent - 1),
 		      dev->kar + MTHCA_EQ_DOORBELL,
 		      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
 }
 static inline void arbel_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
 {
 	/* See comment in tavor_set_eq_ci() above. */
 	wmb();
 	__raw_writel((__force u32) cpu_to_be32(ci),
 		     dev->eq_regs.arbel.eq_set_ci_base + eq->eqn * 8);
 	/* We still want ordering, just not swabbing, so add a barrier */
 	mb();
 }
 static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
 {
 	if (mthca_is_memfree(dev))
 		arbel_set_eq_ci(dev, eq, ci);
 	else
 		tavor_set_eq_ci(dev, eq, ci);
 }
 static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn)
 {
 	mthca_write64(MTHCA_EQ_DB_REQ_NOT | eqn, 0,
 		      dev->kar + MTHCA_EQ_DOORBELL,
 		      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
 }
 static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask)
 {
 	writel(eqn_mask, dev->eq_regs.arbel.eq_arm);
 }
 static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn)
 {
 	if (!mthca_is_memfree(dev)) {
 		mthca_write64(MTHCA_EQ_DB_DISARM_CQ | eqn, cqn,
 			      dev->kar + MTHCA_EQ_DOORBELL,
 			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
 	}
 }
 static inline struct mthca_eqe *get_eqe(struct mthca_eq *eq, u32 entry)
 {
 	unsigned long off = (entry & (eq->nent - 1)) * MTHCA_EQ_ENTRY_SIZE;
 	return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE;
 }
 static inline struct mthca_eqe *next_eqe_sw(struct mthca_eq *eq)
 {
 	struct mthca_eqe *eqe;
 	eqe = get_eqe(eq, eq->cons_index);
 	return (MTHCA_EQ_ENTRY_OWNER_HW & eqe->owner) ? NULL : eqe;
 }
 static inline void set_eqe_hw(struct mthca_eqe *eqe)
 {
 	eqe->owner =  MTHCA_EQ_ENTRY_OWNER_HW;
 }
 static void port_change(struct mthca_dev *dev, int port, int active)
 {
 	struct ib_event record;
 	mthca_dbg(dev, "Port change to %s for port %d\n",
 		  active ? "active" : "down", port);
 	record.device = &dev->ib_dev;
 	record.event  = active ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
 	record.element.port_num = port;
 	ib_dispatch_event(&record);
 }
 static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
 {
 	struct mthca_eqe *eqe;
 	int disarm_cqn;
 	int eqes_found = 0;
 	int set_ci = 0;
 	while ((eqe = next_eqe_sw(eq))) {
 		/*
 		 * Make sure we read EQ entry contents after we've
 		 * checked the ownership bit.
 		 */
 		rmb();
 		switch (eqe->type) {
 		case MTHCA_EVENT_TYPE_COMP:
 			disarm_cqn = be32_to_cpu(eqe->event.comp.cqn) & 0xffffff;
 			disarm_cq(dev, eq->eqn, disarm_cqn);
 			mthca_cq_completion(dev, disarm_cqn);
 			break;
 		case MTHCA_EVENT_TYPE_PATH_MIG:
 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
 				       IB_EVENT_PATH_MIG);
 			break;
 		case MTHCA_EVENT_TYPE_COMM_EST:
 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
 				       IB_EVENT_COMM_EST);
 			break;
 		case MTHCA_EVENT_TYPE_SQ_DRAINED:
 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
 				       IB_EVENT_SQ_DRAINED);
 			break;
 		case MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE:
 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
 				       IB_EVENT_QP_LAST_WQE_REACHED);
 			break;
 		case MTHCA_EVENT_TYPE_SRQ_LIMIT:
 			mthca_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff,
 					IB_EVENT_SRQ_LIMIT_REACHED);
 			break;
 		case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR:
 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
 				       IB_EVENT_QP_FATAL);
 			break;
 		case MTHCA_EVENT_TYPE_PATH_MIG_FAILED:
 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
 				       IB_EVENT_PATH_MIG_ERR);
 			break;
 		case MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
 				       IB_EVENT_QP_REQ_ERR);
 			break;
 		case MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR:
 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
 				       IB_EVENT_QP_ACCESS_ERR);
 			break;
 		case MTHCA_EVENT_TYPE_CMD:
 			mthca_cmd_event(dev,
 					be16_to_cpu(eqe->event.cmd.token),
 					eqe->event.cmd.status,
 					be64_to_cpu(eqe->event.cmd.out_param));
 			break;
 		case MTHCA_EVENT_TYPE_PORT_CHANGE:
 			port_change(dev,
 				    (be32_to_cpu(eqe->event.port_change.port) >> 28) & 3,
 				    eqe->subtype == 0x4);
 			break;
 		case MTHCA_EVENT_TYPE_CQ_ERROR:
 			mthca_warn(dev, "CQ %s on CQN %06x\n",
 				   eqe->event.cq_err.syndrome == 1 ?
 				   "overrun" : "access violation",
 				   be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff);
 			mthca_cq_event(dev, be32_to_cpu(eqe->event.cq_err.cqn),
 				       IB_EVENT_CQ_ERR);
 			break;
 		case MTHCA_EVENT_TYPE_EQ_OVERFLOW:
 			mthca_warn(dev, "EQ overrun on EQN %d\n", eq->eqn);
 			break;
 		case MTHCA_EVENT_TYPE_EEC_CATAS_ERROR:
 		case MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR:
 		case MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR:
 		case MTHCA_EVENT_TYPE_ECC_DETECT:
 		default:
 			mthca_warn(dev, "Unhandled event %02x(%02x) on EQ %d\n",
 				   eqe->type, eqe->subtype, eq->eqn);
 			break;
 		};
 		set_eqe_hw(eqe);
 		++eq->cons_index;
 		eqes_found = 1;
 		++set_ci;
 		/*
 		 * The HCA will think the queue has overflowed if we
 		 * don't tell it we've been processing events.  We
 		 * create our EQs with MTHCA_NUM_SPARE_EQE extra
 		 * entries, so we must update our consumer index at
 		 * least that often.
 		 */
 		if (unlikely(set_ci >= MTHCA_NUM_SPARE_EQE)) {
 			/*
 			 * Conditional on hca_type is OK here because
 			 * this is a rare case, not the fast path.
 			 */
 			set_eq_ci(dev, eq, eq->cons_index);
 			set_ci = 0;
 		}
 	}
 	/*
 	 * Rely on caller to set consumer index so that we don't have
 	 * to test hca_type in our interrupt handling fast path.
 	 */
 	return eqes_found;
 }
 static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr)
 {
 	struct mthca_dev *dev = dev_ptr;
 	u32 ecr;
 	int i;
 	if (dev->eq_table.clr_mask)
 		writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
 	ecr = readl(dev->eq_regs.tavor.ecr_base + 4);
 	if (!ecr)
 		return IRQ_NONE;
 	writel(ecr, dev->eq_regs.tavor.ecr_base +
 	       MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4);
 	for (i = 0; i < MTHCA_NUM_EQ; ++i)
 		if (ecr & dev->eq_table.eq[i].eqn_mask) {
 			if (mthca_eq_int(dev, &dev->eq_table.eq[i]))
 				tavor_set_eq_ci(dev, &dev->eq_table.eq[i],
 						dev->eq_table.eq[i].cons_index);
 			tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
 		}
 	return IRQ_HANDLED;
 }
 static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr)
 {
 	struct mthca_eq  *eq  = eq_ptr;
 	struct mthca_dev *dev = eq->dev;
 	mthca_eq_int(dev, eq);
 	tavor_set_eq_ci(dev, eq, eq->cons_index);
 	tavor_eq_req_not(dev, eq->eqn);
 	/* MSI-X vectors always belong to us */
 	return IRQ_HANDLED;
 }
 static irqreturn_t mthca_arbel_interrupt(int irq, void *dev_ptr)
 {
 	struct mthca_dev *dev = dev_ptr;
 	int work = 0;
 	int i;
 	if (dev->eq_table.clr_mask)
 		writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
 	for (i = 0; i < MTHCA_NUM_EQ; ++i)
 		if (mthca_eq_int(dev, &dev->eq_table.eq[i])) {
 			work = 1;
 			arbel_set_eq_ci(dev, &dev->eq_table.eq[i],
 					dev->eq_table.eq[i].cons_index);
 		}
 	arbel_eq_req_not(dev, dev->eq_table.arm_mask);
 	return IRQ_RETVAL(work);
 }
 static irqreturn_t mthca_arbel_msi_x_interrupt(int irq, void *eq_ptr)
 {
 	struct mthca_eq  *eq  = eq_ptr;
 	struct mthca_dev *dev = eq->dev;
 	mthca_eq_int(dev, eq);
 	arbel_set_eq_ci(dev, eq, eq->cons_index);
 	arbel_eq_req_not(dev, eq->eqn_mask);
 	/* MSI-X vectors always belong to us */
 	return IRQ_HANDLED;
 }
 static int mthca_create_eq(struct mthca_dev *dev,
 			   int nent,
 			   u8 intr,
 			   struct mthca_eq *eq)
 {
 	int npages;
 	u64 *dma_list = NULL;
 	dma_addr_t t;
 	struct mthca_mailbox *mailbox;
 	struct mthca_eq_context *eq_context;
 	int err = -ENOMEM;
 	int i;
 	u8 status;
 	eq->dev  = dev;
 	eq->nent = roundup_pow_of_two(max(nent, 2));
 	npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE;
 	eq->page_list = kmalloc(npages * sizeof *eq->page_list,
 				GFP_KERNEL);
 	if (!eq->page_list)
 		goto err_out;
 	for (i = 0; i < npages; ++i)
 		eq->page_list[i].buf = NULL;
 	dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
 	if (!dma_list)
 		goto err_out_free;
 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 	if (IS_ERR(mailbox))
 		goto err_out_free;
 	eq_context = mailbox->buf;
 	for (i = 0; i < npages; ++i) {
 		eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev,
 							  PAGE_SIZE, &t, GFP_KERNEL);
 		if (!eq->page_list[i].buf)
 			goto err_out_free_pages;
 		dma_list[i] = t;
 		pci_unmap_addr_set(&eq->page_list[i], mapping, t);
 		clear_page(eq->page_list[i].buf);
 	}
 	for (i = 0; i < eq->nent; ++i)
 		set_eqe_hw(get_eqe(eq, i));
 	eq->eqn = mthca_alloc(&dev->eq_table.alloc);
 	if (eq->eqn == -1)
 		goto err_out_free_pages;
 	err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num,
 				  dma_list, PAGE_SHIFT, npages,
 				  0, npages * PAGE_SIZE,
 				  MTHCA_MPT_FLAG_LOCAL_WRITE |
 				  MTHCA_MPT_FLAG_LOCAL_READ,
 				  &eq->mr);
 	if (err)
 		goto err_out_free_eq;
 	memset(eq_context, 0, sizeof *eq_context);
 	eq_context->flags           = cpu_to_be32(MTHCA_EQ_STATUS_OK   |
 						  MTHCA_EQ_OWNER_HW    |
 						  MTHCA_EQ_STATE_ARMED |
 						  MTHCA_EQ_FLAG_TR);
 	if (mthca_is_memfree(dev))
 		eq_context->flags  |= cpu_to_be32(MTHCA_EQ_STATE_ARBEL);
 	eq_context->logsize_usrpage = cpu_to_be32((ffs(eq->nent) - 1) << 24);
 	if (mthca_is_memfree(dev)) {
 		eq_context->arbel_pd = cpu_to_be32(dev->driver_pd.pd_num);
 	} else {
 		eq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index);
 		eq_context->tavor_pd         = cpu_to_be32(dev->driver_pd.pd_num);
 	}
 	eq_context->intr            = intr;
 	eq_context->lkey            = cpu_to_be32(eq->mr.ibmr.lkey);
 	err = mthca_SW2HW_EQ(dev, mailbox, eq->eqn, &status);
 	if (err) {
 		mthca_warn(dev, "SW2HW_EQ failed (%d)\n", err);
 		goto err_out_free_mr;
 	}
 	if (status) {
 		mthca_warn(dev, "SW2HW_EQ returned status 0x%02x\n",
 			   status);
 		err = -EINVAL;
 		goto err_out_free_mr;
 	}
 	kfree(dma_list);
 	mthca_free_mailbox(dev, mailbox);
 	eq->eqn_mask   = swab32(1 << eq->eqn);
 	eq->cons_index = 0;
 	dev->eq_table.arm_mask |= eq->eqn_mask;
 	mthca_dbg(dev, "Allocated EQ %d with %d entries\n",
 		  eq->eqn, eq->nent);
 	return err;
 err_out_free_mr:
 	mthca_free_mr(dev, &eq->mr);
 err_out_free_eq:
 	mthca_free(&dev->eq_table.alloc, eq->eqn);
 err_out_free_pages:
 	for (i = 0; i < npages; ++i)
 		if (eq->page_list[i].buf)
 			dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
 					  eq->page_list[i].buf,
 					  pci_unmap_addr(&eq->page_list[i],
 							 mapping));
 	mthca_free_mailbox(dev, mailbox);
 err_out_free:
 	kfree(eq->page_list);
 	kfree(dma_list);
 err_out:
 	return err;
 }
 static void mthca_free_eq(struct mthca_dev *dev,
 			  struct mthca_eq *eq)
 {
 	struct mthca_mailbox *mailbox;
 	int err;
 	u8 status;
 	int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) /
 		PAGE_SIZE;
 	int i;
 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 	if (IS_ERR(mailbox))
 		return;
 	err = mthca_HW2SW_EQ(dev, mailbox, eq->eqn, &status);
 	if (err)
 		mthca_warn(dev, "HW2SW_EQ failed (%d)\n", err);
 	if (status)
 		mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n", status);
 	dev->eq_table.arm_mask &= ~eq->eqn_mask;
 	if (0) {
 		mthca_dbg(dev, "Dumping EQ context %02x:\n", eq->eqn);
 		for (i = 0; i < sizeof (struct mthca_eq_context) / 4; ++i) {
 			if (i % 4 == 0)
 				printk("[%02x] ", i * 4);
 			printk(" %08x", be32_to_cpup(mailbox->buf + i * 4));
 			if ((i + 1) % 4 == 0)
 				printk("\n");
 		}
 	}
 	mthca_free_mr(dev, &eq->mr);
 	for (i = 0; i < npages; ++i)
 		pci_free_consistent(dev->pdev, PAGE_SIZE,
 				    eq->page_list[i].buf,
 				    pci_unmap_addr(&eq->page_list[i], mapping));
 	kfree(eq->page_list);
 	mthca_free_mailbox(dev, mailbox);
 }
 static void mthca_free_irqs(struct mthca_dev *dev)
 {
 	int i;
 	if (dev->eq_table.have_irq)
 		free_irq(dev->pdev->irq, dev);
 	for (i = 0; i < MTHCA_NUM_EQ; ++i)
 		if (dev->eq_table.eq[i].have_irq) {
 			free_irq(dev->eq_table.eq[i].msi_x_vector,
 				 dev->eq_table.eq + i);
 			dev->eq_table.eq[i].have_irq = 0;
 		}
 }
 static int mthca_map_reg(struct mthca_dev *dev,
 			 unsigned long offset, unsigned long size,
 			 void __iomem **map)
 {
 	unsigned long base = pci_resource_start(dev->pdev, 0);
 	*map = ioremap(base + offset, size);
 	if (!*map)
 		return -ENOMEM;
 	return 0;
 }
 static int mthca_map_eq_regs(struct mthca_dev *dev)
 {
 	if (mthca_is_memfree(dev)) {
 		/*
 		 * We assume that the EQ arm and EQ set CI registers
 		 * fall within the first BAR.  We can't trust the
 		 * values firmware gives us, since those addresses are
 		 * valid on the HCA's side of the PCI bus but not
 		 * necessarily the host side.
 		 */
 		if (mthca_map_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
 				  dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
 				  &dev->clr_base)) {
 			mthca_err(dev, "Couldn't map interrupt clear register, "
 				  "aborting.\n");
 			return -ENOMEM;
 		}
 		/*
 		 * Add 4 because we limit ourselves to EQs 0 ... 31,
 		 * so we only need the low word of the register.
 		 */
 		if (mthca_map_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
 					dev->fw.arbel.eq_arm_base) + 4, 4,
 				  &dev->eq_regs.arbel.eq_arm)) {
 			mthca_err(dev, "Couldn't map EQ arm register, aborting.\n");
 			iounmap(dev->clr_base);
 			return -ENOMEM;
 		}
 		if (mthca_map_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
 				  dev->fw.arbel.eq_set_ci_base,
 				  MTHCA_EQ_SET_CI_SIZE,
 				  &dev->eq_regs.arbel.eq_set_ci_base)) {
 			mthca_err(dev, "Couldn't map EQ CI register, aborting.\n");
 			iounmap(dev->eq_regs.arbel.eq_arm);
 			iounmap(dev->clr_base);
 			return -ENOMEM;
 		}
 	} else {
 		if (mthca_map_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
 				  &dev->clr_base)) {
 			mthca_err(dev, "Couldn't map interrupt clear register, "
 				  "aborting.\n");
 			return -ENOMEM;
 		}
 		if (mthca_map_reg(dev, MTHCA_ECR_BASE,
 				  MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE,
 				  &dev->eq_regs.tavor.ecr_base)) {
 			mthca_err(dev, "Couldn't map ecr register, "
 				  "aborting.\n");
 			iounmap(dev->clr_base);
 			return -ENOMEM;
 		}
 	}
 	return 0;
 }
 static void mthca_unmap_eq_regs(struct mthca_dev *dev)
 {
 	if (mthca_is_memfree(dev)) {
 		iounmap(dev->eq_regs.arbel.eq_set_ci_base);
 		iounmap(dev->eq_regs.arbel.eq_arm);
 		iounmap(dev->clr_base);
 	} else {
 		iounmap(dev->eq_regs.tavor.ecr_base);
 		iounmap(dev->clr_base);
 	}
 }
 int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
 {
 	int ret;
 	u8 status;
 	/*
 	 * We assume that mapping one page is enough for the whole EQ
 	 * context table.  This is fine with all current HCAs, because
 	 * we only use 32 EQs and each EQ uses 32 bytes of context
 	 * memory, or 1 KB total.
 	 */
 	dev->eq_table.icm_virt = icm_virt;
 	dev->eq_table.icm_page = alloc_page(GFP_HIGHUSER);
 	if (!dev->eq_table.icm_page)
 		return -ENOMEM;
 	dev->eq_table.icm_dma  = pci_map_page(dev->pdev, dev->eq_table.icm_page, 0,
 					      PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 	if (pci_dma_mapping_error(dev->pdev, dev->eq_table.icm_dma)) {
 		__free_page(dev->eq_table.icm_page);
 		return -ENOMEM;
 	}
 	ret = mthca_MAP_ICM_page(dev, dev->eq_table.icm_dma, icm_virt, &status);
 	if (!ret && status)
 		ret = -EINVAL;
 	if (ret) {
 		pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
 			       PCI_DMA_BIDIRECTIONAL);
 		__free_page(dev->eq_table.icm_page);
 	}
 	return ret;
 }
 void mthca_unmap_eq_icm(struct mthca_dev *dev)
 {
 	u8 status;
 	mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, 1, &status);
 	pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
 		       PCI_DMA_BIDIRECTIONAL);
 	__free_page(dev->eq_table.icm_page);
 }
 int mthca_init_eq_table(struct mthca_dev *dev)
 {
 	int err;
 	u8 status;
 	u8 intr;
 	int i;
 	err = mthca_alloc_init(&dev->eq_table.alloc,
 			       dev->limits.num_eqs,
 			       dev->limits.num_eqs - 1,
 			       dev->limits.reserved_eqs);
 	if (err)
 		return err;
 	err = mthca_map_eq_regs(dev);
 	if (err)
 		goto err_out_free;
 	if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
 		dev->eq_table.clr_mask = 0;
 	} else {
 		dev->eq_table.clr_mask =
 			swab32(1 << (dev->eq_table.inta_pin & 31));
 		dev->eq_table.clr_int  = dev->clr_base +
 			(dev->eq_table.inta_pin < 32 ? 4 : 0);
 	}
 	dev->eq_table.arm_mask = 0;
 	intr = dev->eq_table.inta_pin;
 	err = mthca_create_eq(dev, dev->limits.num_cqs + MTHCA_NUM_SPARE_EQE,
 			      (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 128 : intr,
 			      &dev->eq_table.eq[MTHCA_EQ_COMP]);
 	if (err)
 		goto err_out_unmap;
 	err = mthca_create_eq(dev, MTHCA_NUM_ASYNC_EQE + MTHCA_NUM_SPARE_EQE,
 			      (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 129 : intr,
 			      &dev->eq_table.eq[MTHCA_EQ_ASYNC]);
 	if (err)
 		goto err_out_comp;
 	err = mthca_create_eq(dev, MTHCA_NUM_CMD_EQE + MTHCA_NUM_SPARE_EQE,
 			      (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 130 : intr,
 			      &dev->eq_table.eq[MTHCA_EQ_CMD]);
 	if (err)
 		goto err_out_async;
 	if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
 		static const char *eq_name[] = {
 			[MTHCA_EQ_COMP]  = DRV_NAME " (comp)",
 			[MTHCA_EQ_ASYNC] = DRV_NAME " (async)",
 			[MTHCA_EQ_CMD]   = DRV_NAME " (cmd)"
 		};
 		for (i = 0; i < MTHCA_NUM_EQ; ++i) {
 			err = request_irq(dev->eq_table.eq[i].msi_x_vector,
 					  mthca_is_memfree(dev) ?
 					  mthca_arbel_msi_x_interrupt :
 					  mthca_tavor_msi_x_interrupt,
 					  0, eq_name[i], dev->eq_table.eq + i);
 			if (err)
 				goto err_out_cmd;
 			dev->eq_table.eq[i].have_irq = 1;
 		}
 	} else {
 		err = request_irq(dev->pdev->irq,
 				  mthca_is_memfree(dev) ?
 				  mthca_arbel_interrupt :
 				  mthca_tavor_interrupt,
 				  IRQF_SHARED, DRV_NAME, dev);
 		if (err)
 			goto err_out_cmd;
 		dev->eq_table.have_irq = 1;
 	}
 	err = mthca_MAP_EQ(dev, async_mask(dev),
 			   0, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status);
 	if (err)
 		mthca_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
 			   dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, err);
 	if (status)
 		mthca_warn(dev, "MAP_EQ for async EQ %d returned status 0x%02x\n",
 			   dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, status);
 	err = mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
 			   0, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status);
 	if (err)
 		mthca_warn(dev, "MAP_EQ for cmd EQ %d failed (%d)\n",
 			   dev->eq_table.eq[MTHCA_EQ_CMD].eqn, err);
 	if (status)
 		mthca_warn(dev, "MAP_EQ for cmd EQ %d returned status 0x%02x\n",
 			   dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status);
 	for (i = 0; i < MTHCA_NUM_EQ; ++i)
 		if (mthca_is_memfree(dev))
 			arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask);
 		else
 			tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
 	return 0;
 err_out_cmd:
 	mthca_free_irqs(dev);
 	mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_CMD]);
 err_out_async:
 	mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_ASYNC]);
 err_out_comp:
 	mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_COMP]);
 err_out_unmap:
 	mthca_unmap_eq_regs(dev);
 err_out_free:
 	mthca_alloc_cleanup(&dev->eq_table.alloc);
 	return err;
 }
 void mthca_cleanup_eq_table(struct mthca_dev *dev)
 {
 	u8 status;
 	int i;
 	mthca_free_irqs(dev);
 	mthca_MAP_EQ(dev, async_mask(dev),
 		     1, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status);
 	mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
 		     1, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status);
 	for (i = 0; i < MTHCA_NUM_EQ; ++i)
 		mthca_free_eq(dev, &dev->eq_table.eq[i]);
 	mthca_unmap_eq_regs(dev);
 	mthca_alloc_cleanup(&dev->eq_table.alloc);
 }
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_mad.c
@ -1,346 +0,0 @@
 /*
 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
 * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_mad.h>
 #include <rdma/ib_smi.h>
 #include "mthca_dev.h"
 #include "mthca_cmd.h"
 enum {
 	MTHCA_VENDOR_CLASS1 = 0x9,
 	MTHCA_VENDOR_CLASS2 = 0xa
 };
 static int mthca_update_rate(struct mthca_dev *dev, u8 port_num)
 {
 	struct ib_port_attr *tprops = NULL;
 	int                  ret;
 	tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
 	if (!tprops)
 		return -ENOMEM;
 	ret = ib_query_port(&dev->ib_dev, port_num, tprops);
 	if (ret) {
 		printk(KERN_WARNING "ib_query_port failed (%d) for %s port %d\n",
 		       ret, dev->ib_dev.name, port_num);
 		goto out;
 	}
 	dev->rate[port_num - 1] = tprops->active_speed *
 				  ib_width_enum_to_int(tprops->active_width);
 out:
 	kfree(tprops);
 	return ret;
 }
 static void update_sm_ah(struct mthca_dev *dev,
 			 u8 port_num, u16 lid, u8 sl)
 {
 	struct ib_ah *new_ah;
 	struct ib_ah_attr ah_attr;
 	unsigned long flags;
 	if (!dev->send_agent[port_num - 1][0])
 		return;
 	memset(&ah_attr, 0, sizeof ah_attr);
 	ah_attr.dlid     = lid;
 	ah_attr.sl       = sl;
 	ah_attr.port_num = port_num;
 	new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
 			      &ah_attr);
 	if (IS_ERR(new_ah))
 		return;
 	spin_lock_irqsave(&dev->sm_lock, flags);
 	if (dev->sm_ah[port_num - 1])
 		ib_destroy_ah(dev->sm_ah[port_num - 1]);
 	dev->sm_ah[port_num - 1] = new_ah;
 	spin_unlock_irqrestore(&dev->sm_lock, flags);
 }
 /*
 * Snoop SM MADs for port info and P_Key table sets, so we can
 * synthesize LID change and P_Key change events.
 */
 static void smp_snoop(struct ib_device *ibdev,
 		      u8 port_num,
 		      struct ib_mad *mad,
 		      u16 prev_lid)
 {
 	struct ib_event event;
 	if ((mad->mad_hdr.mgmt_class  == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
 	     mad->mad_hdr.mgmt_class  == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
 	    mad->mad_hdr.method     == IB_MGMT_METHOD_SET) {
 		if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) {
 			struct ib_port_info *pinfo =
 				(struct ib_port_info *) ((struct ib_smp *) mad)->data;
 			u16 lid = be16_to_cpu(pinfo->lid);
 			mthca_update_rate(to_mdev(ibdev), port_num);
 			update_sm_ah(to_mdev(ibdev), port_num,
 				     be16_to_cpu(pinfo->sm_lid),
 				     pinfo->neighbormtu_mastersmsl & 0xf);
 			event.device           = ibdev;
 			event.element.port_num = port_num;
 			if (pinfo->clientrereg_resv_subnetto & 0x80) {
 				event.event    = IB_EVENT_CLIENT_REREGISTER;
 				ib_dispatch_event(&event);
 			}
 			if (prev_lid != lid) {
 				event.event    = IB_EVENT_LID_CHANGE;
 				ib_dispatch_event(&event);
 			}
 		}
 		if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) {
 			event.device           = ibdev;
 			event.event            = IB_EVENT_PKEY_CHANGE;
 			event.element.port_num = port_num;
 			ib_dispatch_event(&event);
 		}
 	}
 }
 static void node_desc_override(struct ib_device *dev,
 			       struct ib_mad *mad)
 {
 	if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
 	     mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
 	    mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP &&
 	    mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) {
 		mutex_lock(&to_mdev(dev)->cap_mask_mutex);
 		memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64);
 		mutex_unlock(&to_mdev(dev)->cap_mask_mutex);
 	}
 }
 static void forward_trap(struct mthca_dev *dev,
 			 u8 port_num,
 			 struct ib_mad *mad)
 {
 	int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED;
 	struct ib_mad_send_buf *send_buf;
 	struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
 	int ret;
 	unsigned long flags;
 	if (agent) {
 		send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
 					      IB_MGMT_MAD_DATA, GFP_ATOMIC);
 		/*
 		 * We rely here on the fact that MLX QPs don't use the
 		 * address handle after the send is posted (this is
 		 * wrong following the IB spec strictly, but we know
 		 * it's OK for our devices).
 		 */
 		spin_lock_irqsave(&dev->sm_lock, flags);
 		memcpy(send_buf->mad, mad, sizeof *mad);
 		if ((send_buf->ah = dev->sm_ah[port_num - 1]))
 			ret = ib_post_send_mad(send_buf, NULL);
 		else
 			ret = -EINVAL;
 		spin_unlock_irqrestore(&dev->sm_lock, flags);
 		if (ret)
 			ib_free_send_mad(send_buf);
 	}
 }
 int mthca_process_mad(struct ib_device *ibdev,
 		      int mad_flags,
 		      u8 port_num,
 		      struct ib_wc *in_wc,
 		      struct ib_grh *in_grh,
 		      struct ib_mad *in_mad,
 		      struct ib_mad *out_mad)
 {
 	int err;
 	u8 status;
 	u16 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
 	u16 prev_lid = 0;
 	struct ib_port_attr pattr;
 	/* Forward locally generated traps to the SM */
 	if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP &&
 	    slid == 0) {
 		forward_trap(to_mdev(ibdev), port_num, in_mad);
 		return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
 	}
 	/*
 	 * Only handle SM gets, sets and trap represses for SM class
 	 *
 	 * Only handle PMA and Mellanox vendor-specific class gets and
 	 * sets for other classes.
 	 */
 	if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
 	    in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
 		if (in_mad->mad_hdr.method   != IB_MGMT_METHOD_GET &&
 		    in_mad->mad_hdr.method   != IB_MGMT_METHOD_SET &&
 		    in_mad->mad_hdr.method   != IB_MGMT_METHOD_TRAP_REPRESS)
 			return IB_MAD_RESULT_SUCCESS;
 		/*
 		 * Don't process SMInfo queries or vendor-specific
 		 * MADs -- the SMA can't handle them.
 		 */
 		if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO ||
 		    ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) ==
 		     IB_SMP_ATTR_VENDOR_MASK))
 			return IB_MAD_RESULT_SUCCESS;
 	} else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
 		   in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1     ||
 		   in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) {
 		if (in_mad->mad_hdr.method  != IB_MGMT_METHOD_GET &&
 		    in_mad->mad_hdr.method  != IB_MGMT_METHOD_SET)
 			return IB_MAD_RESULT_SUCCESS;
 	} else
 		return IB_MAD_RESULT_SUCCESS;
 	if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
 	     in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
 	    in_mad->mad_hdr.method == IB_MGMT_METHOD_SET &&
 	    in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
 	    !ib_query_port(ibdev, port_num, &pattr))
 		prev_lid = pattr.lid;
 	err = mthca_MAD_IFC(to_mdev(ibdev),
 			    mad_flags & IB_MAD_IGNORE_MKEY,
 			    mad_flags & IB_MAD_IGNORE_BKEY,
 			    port_num, in_wc, in_grh, in_mad, out_mad,
 			    &status);
 	if (err) {
 		mthca_err(to_mdev(ibdev), "MAD_IFC failed\n");
 		return IB_MAD_RESULT_FAILURE;
 	}
 	if (status == MTHCA_CMD_STAT_BAD_PKT)
 		return IB_MAD_RESULT_SUCCESS;
 	if (status) {
 		mthca_err(to_mdev(ibdev), "MAD_IFC returned status %02x\n",
 			  status);
 		return IB_MAD_RESULT_FAILURE;
 	}
 	if (!out_mad->mad_hdr.status) {
 		smp_snoop(ibdev, port_num, in_mad, prev_lid);
 		node_desc_override(ibdev, out_mad);
 	}
 	/* set return bit in status of directed route responses */
 	if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
 		out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
 	if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
 		/* no response for trap repress */
 		return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
 	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
 }
 static void send_handler(struct ib_mad_agent *agent,
 			 struct ib_mad_send_wc *mad_send_wc)
 {
 	ib_free_send_mad(mad_send_wc->send_buf);
 }
 int mthca_create_agents(struct mthca_dev *dev)
 {
 	struct ib_mad_agent *agent;
 	int p, q;
 	int ret;
 	spin_lock_init(&dev->sm_lock);
 	for (p = 0; p < dev->limits.num_ports; ++p)
 		for (q = 0; q <= 1; ++q) {
 			agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
 						      q ? IB_QPT_GSI : IB_QPT_SMI,
 						      NULL, 0, send_handler,
 						      NULL, NULL);
 			if (IS_ERR(agent)) {
 				ret = PTR_ERR(agent);
 				goto err;
 			}
 			dev->send_agent[p][q] = agent;
 		}
 	for (p = 1; p <= dev->limits.num_ports; ++p) {
 		ret = mthca_update_rate(dev, p);
 		if (ret) {
 			mthca_err(dev, "Failed to obtain port %d rate."
 				  " aborting.\n", p);
 			goto err;
 		}
 	}
 	return 0;
 err:
 	for (p = 0; p < dev->limits.num_ports; ++p)
 		for (q = 0; q <= 1; ++q)
 			if (dev->send_agent[p][q])
 				ib_unregister_mad_agent(dev->send_agent[p][q]);
 	return ret;
 }
 void mthca_free_agents(struct mthca_dev *dev)
 {
 	struct ib_mad_agent *agent;
 	int p, q;
 	for (p = 0; p < dev->limits.num_ports; ++p) {
 		for (q = 0; q <= 1; ++q) {
 			agent = dev->send_agent[p][q];
 			dev->send_agent[p][q] = NULL;
 			ib_unregister_mad_agent(agent);
 		}
 		if (dev->sm_ah[p])
 			ib_destroy_ah(dev->sm_ah[p]);
 	}
 }
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_main.c
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_mcg.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_mcg.c
@ -1,372 +0,0 @@
 /*
 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #include <linux/string.h>
 #include <linux/slab.h>
 #include "mthca_dev.h"
 #include "mthca_cmd.h"
 struct mthca_mgm {
 	__be32 next_gid_index;
 	u32    reserved[3];
 	u8     gid[16];
 	__be32 qp[MTHCA_QP_PER_MGM];
 };
 static const u8 zero_gid[16];	/* automatically initialized to 0 */
 /*
 * Caller must hold MCG table semaphore.  gid and mgm parameters must
 * be properly aligned for command interface.
 *
 *  Returns 0 unless a firmware command error occurs.
 *
 * If GID is found in MGM or MGM is empty, *index = *hash, *prev = -1
 * and *mgm holds MGM entry.
 *
 * if GID is found in AMGM, *index = index in AMGM, *prev = index of
 * previous entry in hash chain and *mgm holds AMGM entry.
 *
 * If no AMGM exists for given gid, *index = -1, *prev = index of last
 * entry in hash chain and *mgm holds end of hash chain.
 */
 static int find_mgm(struct mthca_dev *dev,
 		    u8 *gid, struct mthca_mailbox *mgm_mailbox,
 		    u16 *hash, int *prev, int *index)
 {
 	struct mthca_mailbox *mailbox;
 	struct mthca_mgm *mgm = mgm_mailbox->buf;
 	u8 *mgid;
 	int err;
 	u8 status;
 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 	if (IS_ERR(mailbox))
 		return -ENOMEM;
 	mgid = mailbox->buf;
 	memcpy(mgid, gid, 16);
 	err = mthca_MGID_HASH(dev, mailbox, hash, &status);
 	if (err)
 		goto out;
 	if (status) {
 		mthca_err(dev, "MGID_HASH returned status %02x\n", status);
 		err = -EINVAL;
 		goto out;
 	}
 	if (0)
 		mthca_dbg(dev, "Hash for %pI6 is %04x\n", gid, *hash);
 	*index = *hash;
 	*prev  = -1;
 	do {
 		err = mthca_READ_MGM(dev, *index, mgm_mailbox, &status);
 		if (err)
 			goto out;
 		if (status) {
 			mthca_err(dev, "READ_MGM returned status %02x\n", status);
 			err = -EINVAL;
 			goto out;
 		}
 		if (!memcmp(mgm->gid, zero_gid, 16)) {
 			if (*index != *hash) {
 				mthca_err(dev, "Found zero MGID in AMGM.\n");
 				err = -EINVAL;
 			}
 			goto out;
 		}
 		if (!memcmp(mgm->gid, gid, 16))
 			goto out;
 		*prev = *index;
 		*index = be32_to_cpu(mgm->next_gid_index) >> 6;
 	} while (*index);
 	*index = -1;
 out:
 	mthca_free_mailbox(dev, mailbox);
 	return err;
 }
 int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 {
 	struct mthca_dev *dev = to_mdev(ibqp->device);
 	struct mthca_mailbox *mailbox;
 	struct mthca_mgm *mgm;
 	u16 hash;
 	int index, prev;
 	int link = 0;
 	int i;
 	int err;
 	u8 status;
 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 	if (IS_ERR(mailbox))
 		return PTR_ERR(mailbox);
 	mgm = mailbox->buf;
 	mutex_lock(&dev->mcg_table.mutex);
 	err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index);
 	if (err)
 		goto out;
 	if (index != -1) {
 		if (!memcmp(mgm->gid, zero_gid, 16))
 			memcpy(mgm->gid, gid->raw, 16);
 	} else {
 		link = 1;
 		index = mthca_alloc(&dev->mcg_table.alloc);
 		if (index == -1) {
 			mthca_err(dev, "No AMGM entries left\n");
 			err = -ENOMEM;
 			goto out;
 		}
 		err = mthca_READ_MGM(dev, index, mailbox, &status);
 		if (err)
 			goto out;
 		if (status) {
 			mthca_err(dev, "READ_MGM returned status %02x\n", status);
 			err = -EINVAL;
 			goto out;
 		}
 		memset(mgm, 0, sizeof *mgm);
 		memcpy(mgm->gid, gid->raw, 16);
 	}
 	for (i = 0; i < MTHCA_QP_PER_MGM; ++i)
 		if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1U << 31))) {
 			mthca_dbg(dev, "QP %06x already a member of MGM\n",
 				  ibqp->qp_num);
 			err = 0;
 			goto out;
 		} else if (!(mgm->qp[i] & cpu_to_be32(1U << 31))) {
 			mgm->qp[i] = cpu_to_be32(ibqp->qp_num | (1U << 31));
 			break;
 		}
 	if (i == MTHCA_QP_PER_MGM) {
 		mthca_err(dev, "MGM at index %x is full.\n", index);
 		err = -ENOMEM;
 		goto out;
 	}
 	err = mthca_WRITE_MGM(dev, index, mailbox, &status);
 	if (err)
 		goto out;
 	if (status) {
 		mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
 		err = -EINVAL;
 		goto out;
 	}
 	if (!link)
 		goto out;
 	err = mthca_READ_MGM(dev, prev, mailbox, &status);
 	if (err)
 		goto out;
 	if (status) {
 		mthca_err(dev, "READ_MGM returned status %02x\n", status);
 		err = -EINVAL;
 		goto out;
 	}
 	mgm->next_gid_index = cpu_to_be32(index << 6);
 	err = mthca_WRITE_MGM(dev, prev, mailbox, &status);
 	if (err)
 		goto out;
 	if (status) {
 		mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
 		err = -EINVAL;
 	}
 out:
 	if (err && link && index != -1) {
 		BUG_ON(index < dev->limits.num_mgms);
 		mthca_free(&dev->mcg_table.alloc, index);
 	}
 	mutex_unlock(&dev->mcg_table.mutex);
 	mthca_free_mailbox(dev, mailbox);
 	return err;
 }
 int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 {
 	struct mthca_dev *dev = to_mdev(ibqp->device);
 	struct mthca_mailbox *mailbox;
 	struct mthca_mgm *mgm;
 	u16 hash;
 	int prev, index;
 	int i, loc;
 	int err;
 	u8 status;
 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 	if (IS_ERR(mailbox))
 		return PTR_ERR(mailbox);
 	mgm = mailbox->buf;
 	mutex_lock(&dev->mcg_table.mutex);
 	err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index);
 	if (err)
 		goto out;
 	if (index == -1) {
 		mthca_err(dev, "MGID %pI6 not found\n", gid->raw);
 		err = -EINVAL;
 		goto out;
 	}
 	for (loc = -1, i = 0; i < MTHCA_QP_PER_MGM; ++i) {
 		if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1U << 31)))
 			loc = i;
 		if (!(mgm->qp[i] & cpu_to_be32(1U << 31)))
 			break;
 	}
 	if (loc == -1) {
 		mthca_err(dev, "QP %06x not found in MGM\n", ibqp->qp_num);
 		err = -EINVAL;
 		goto out;
 	}
 	mgm->qp[loc]   = mgm->qp[i - 1];
 	mgm->qp[i - 1] = 0;
 	err = mthca_WRITE_MGM(dev, index, mailbox, &status);
 	if (err)
 		goto out;
 	if (status) {
 		mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
 		err = -EINVAL;
 		goto out;
 	}
 	if (i != 1)
 		goto out;
 	if (prev == -1) {
 		/* Remove entry from MGM */
 		int amgm_index_to_free = be32_to_cpu(mgm->next_gid_index) >> 6;
 		if (amgm_index_to_free) {
 			err = mthca_READ_MGM(dev, amgm_index_to_free,
 					     mailbox, &status);
 			if (err)
 				goto out;
 			if (status) {
 				mthca_err(dev, "READ_MGM returned status %02x\n",
 					  status);
 				err = -EINVAL;
 				goto out;
 			}
 		} else
 			memset(mgm->gid, 0, 16);
 		err = mthca_WRITE_MGM(dev, index, mailbox, &status);
 		if (err)
 			goto out;
 		if (status) {
 			mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
 			err = -EINVAL;
 			goto out;
 		}
 		if (amgm_index_to_free) {
 			BUG_ON(amgm_index_to_free < dev->limits.num_mgms);
 			mthca_free(&dev->mcg_table.alloc, amgm_index_to_free);
 		}
 	} else {
 		/* Remove entry from AMGM */
 		int curr_next_index = be32_to_cpu(mgm->next_gid_index) >> 6;
 		err = mthca_READ_MGM(dev, prev, mailbox, &status);
 		if (err)
 			goto out;
 		if (status) {
 			mthca_err(dev, "READ_MGM returned status %02x\n", status);
 			err = -EINVAL;
 			goto out;
 		}
 		mgm->next_gid_index = cpu_to_be32(curr_next_index << 6);
 		err = mthca_WRITE_MGM(dev, prev, mailbox, &status);
 		if (err)
 			goto out;
 		if (status) {
 			mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
 			err = -EINVAL;
 			goto out;
 		}
 		BUG_ON(index < dev->limits.num_mgms);
 		mthca_free(&dev->mcg_table.alloc, index);
 	}
 out:
 	mutex_unlock(&dev->mcg_table.mutex);
 	mthca_free_mailbox(dev, mailbox);
 	return err;
 }
 int mthca_init_mcg_table(struct mthca_dev *dev)
 {
 	int err;
 	int table_size = dev->limits.num_mgms + dev->limits.num_amgms;
 	err = mthca_alloc_init(&dev->mcg_table.alloc,
 			       table_size,
 			       table_size - 1,
 			       dev->limits.num_mgms);
 	if (err)
 		return err;
 	mutex_init(&dev->mcg_table.mutex);
 	return 0;
 }
 void mthca_cleanup_mcg_table(struct mthca_dev *dev)
 {
 	mthca_alloc_cleanup(&dev->mcg_table.alloc);
 }
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_memfree.c
@ -1,881 +0,0 @@
 /*
 * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
 * Copyright (c) 2005 Cisco Systems.  All rights reserved.
 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #include <linux/mm.h>
 #include <linux/scatterlist.h>
 #include <linux/sched.h>
 #include <linux/page.h>
 #include "mthca_memfree.h"
 #include "mthca_dev.h"
 #include "mthca_cmd.h"
 /*
 * We allocate in as big chunks as we can, up to a maximum of 256 KB
 * per chunk.
 */
 enum {
 	MTHCA_ICM_ALLOC_SIZE   = 1 << 18,
 	MTHCA_TABLE_CHUNK_SIZE = 1 << 18
 };
 struct mthca_user_db_table {
 	struct mutex mutex;
 	struct {
 		u64                uvirt;
 		struct scatterlist mem;
 		int                refcount;
 	}                page[0];
 };
 static void mthca_free_icm_pages(struct mthca_dev *dev, struct mthca_icm_chunk *chunk)
 {
 	int i;
 	if (chunk->nsg > 0)
 		pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,
 			     PCI_DMA_BIDIRECTIONAL);
 	for (i = 0; i < chunk->npages; ++i)
 		__free_pages(sg_page(&chunk->mem[i]),
 			     get_order(chunk->mem[i].length));
 }
 static void mthca_free_icm_coherent(struct mthca_dev *dev, struct mthca_icm_chunk *chunk)
 {
 	int i;
 	for (i = 0; i < chunk->npages; ++i) {
 		dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length,
 				  lowmem_page_address(sg_page(&chunk->mem[i])),
 				  sg_dma_address(&chunk->mem[i]));
 	}
 }
 void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent)
 {
 	struct mthca_icm_chunk *chunk, *tmp;
 	if (!icm)
 		return;
 	list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) {
 		if (coherent)
 			mthca_free_icm_coherent(dev, chunk);
 		else
 			mthca_free_icm_pages(dev, chunk);
 		kfree(chunk);
 	}
 	kfree(icm);
 }
 static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask)
 {
 	struct page *page;
 	/*
 	 * Use __GFP_ZERO because buggy firmware assumes ICM pages are
 	 * cleared, and subtle failures are seen if they aren't.
 	 */
 	page = alloc_pages(gfp_mask | __GFP_ZERO, order);
 	if (!page)
 		return -ENOMEM;
 	sg_set_page(mem, page, PAGE_SIZE << order, 0);
 	return 0;
 }
 static int mthca_alloc_icm_coherent(struct device *dev, struct scatterlist *mem,
 				    int order, gfp_t gfp_mask)
 {
 	void *buf = dma_alloc_coherent(dev, PAGE_SIZE << order, &sg_dma_address(mem),
 				       gfp_mask);
 	if (!buf)
 		return -ENOMEM;
 	sg_set_buf(mem, buf, PAGE_SIZE << order);
 	BUG_ON(mem->offset);
 	sg_dma_len(mem) = PAGE_SIZE << order;
 	return 0;
 }
 struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
 				  gfp_t gfp_mask, int coherent)
 {
 	struct mthca_icm *icm;
 	struct mthca_icm_chunk *chunk = NULL;
 	int cur_order;
 	int ret;
 	/* We use sg_set_buf for coherent allocs, which assumes low memory */
 	BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM));
 	icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
 	if (!icm)
 		return icm;
 	icm->refcount = 0;
 	INIT_LIST_HEAD(&icm->chunk_list);
 	cur_order = get_order(MTHCA_ICM_ALLOC_SIZE);
 	while (npages > 0) {
 		if (!chunk) {
 			chunk = kmalloc(sizeof *chunk,
 					gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
 			if (!chunk)
 				goto fail;
 			sg_init_table(chunk->mem, MTHCA_ICM_CHUNK_LEN);
 			chunk->npages = 0;
 			chunk->nsg    = 0;
 			list_add_tail(&chunk->list, &icm->chunk_list);
 		}
 		while (1 << cur_order > npages)
 			--cur_order;
 		if (coherent)
 			ret = mthca_alloc_icm_coherent(&dev->pdev->dev,
 						       &chunk->mem[chunk->npages],
 						       cur_order, gfp_mask);
 		else
 			ret = mthca_alloc_icm_pages(&chunk->mem[chunk->npages],
 						    cur_order, gfp_mask);
 		if (!ret) {
 			++chunk->npages;
 			if (coherent)
 				++chunk->nsg;
 			else if (chunk->npages == MTHCA_ICM_CHUNK_LEN) {
 				chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
 							chunk->npages,
 							PCI_DMA_BIDIRECTIONAL);
 				if (chunk->nsg <= 0)
 					goto fail;
 			}
 			if (chunk->npages == MTHCA_ICM_CHUNK_LEN)
 				chunk = NULL;
 			npages -= 1 << cur_order;
 		} else {
 			--cur_order;
 			if (cur_order < 0)
 				goto fail;
 		}
 	}
 	if (!coherent && chunk) {
 		chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
 					chunk->npages,
 					PCI_DMA_BIDIRECTIONAL);
 		if (chunk->nsg <= 0)
 			goto fail;
 	}
 	return icm;
 fail:
 	mthca_free_icm(dev, icm, coherent);
 	return NULL;
 }
 int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)
 {
 	int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
 	int ret = 0;
 	u8 status;
 	mutex_lock(&table->mutex);
 	if (table->icm[i]) {
 		++table->icm[i]->refcount;
 		goto out;
 	}
 	table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
 					(table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
 					__GFP_NOWARN, table->coherent);
 	if (!table->icm[i]) {
 		ret = -ENOMEM;
 		goto out;
 	}
 	if (mthca_MAP_ICM(dev, table->icm[i], table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
 			  &status) || status) {
 		mthca_free_icm(dev, table->icm[i], table->coherent);
 		table->icm[i] = NULL;
 		ret = -ENOMEM;
 		goto out;
 	}
 	++table->icm[i]->refcount;
 out:
 	mutex_unlock(&table->mutex);
 	return ret;
 }
 void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)
 {
 	int i;
 	u8 status;
 	if (!mthca_is_memfree(dev))
 		return;
 	i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
 	mutex_lock(&table->mutex);
 	if (--table->icm[i]->refcount == 0) {
 		mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
 				MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
 				&status);
 		mthca_free_icm(dev, table->icm[i], table->coherent);
 		table->icm[i] = NULL;
 	}
 	mutex_unlock(&table->mutex);
 }
 void *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_handle)
 {
 	int idx, offset, dma_offset, i;
 	struct mthca_icm_chunk *chunk;
 	struct mthca_icm *icm;
 	struct page *page = NULL;
 	if (!table->lowmem)
 		return NULL;
 	mutex_lock(&table->mutex);
 	idx = (obj & (table->num_obj - 1)) * table->obj_size;
 	icm = table->icm[idx / MTHCA_TABLE_CHUNK_SIZE];
 	dma_offset = offset = idx % MTHCA_TABLE_CHUNK_SIZE;
 	if (!icm)
 		goto out;
 	list_for_each_entry(chunk, &icm->chunk_list, list) {
 		for (i = 0; i < chunk->npages; ++i) {
 			if (dma_handle && dma_offset >= 0) {
 				if (sg_dma_len(&chunk->mem[i]) > dma_offset)
 					*dma_handle = sg_dma_address(&chunk->mem[i]) +
 						dma_offset;
 				dma_offset -= sg_dma_len(&chunk->mem[i]);
 			}
 			/* DMA mapping can merge pages but not split them,
 			 * so if we found the page, dma_handle has already
 			 * been assigned to. */
 			if (chunk->mem[i].length > offset) {
 				page = sg_page(&chunk->mem[i]);
 				goto out;
 			}
 			offset -= chunk->mem[i].length;
 		}
 	}
 out:
 	mutex_unlock(&table->mutex);
 	return page ? lowmem_page_address(page) + offset : NULL;
 }
 int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table,
 			  int start, int end)
 {
 	int inc = MTHCA_TABLE_CHUNK_SIZE / table->obj_size;
 	int i, err;
 	for (i = start; i <= end; i += inc) {
 		err = mthca_table_get(dev, table, i);
 		if (err)
 			goto fail;
 	}
 	return 0;
 fail:
 	while (i > start) {
 		i -= inc;
 		mthca_table_put(dev, table, i);
 	}
 	return err;
 }
 void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,
 			   int start, int end)
 {
 	int i;
 	if (!mthca_is_memfree(dev))
 		return;
 	for (i = start; i <= end; i += MTHCA_TABLE_CHUNK_SIZE / table->obj_size)
 		mthca_table_put(dev, table, i);
 }
 struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
 					      u64 virt, int obj_size,
 					      int nobj, int reserved,
 					      int use_lowmem, int use_coherent)
 {
 	struct mthca_icm_table *table;
 	int obj_per_chunk;
 	int num_icm;
 	unsigned chunk_size;
 	int i;
 	u8 status;
 	obj_per_chunk = MTHCA_TABLE_CHUNK_SIZE / obj_size;
 	num_icm = DIV_ROUND_UP(nobj, obj_per_chunk);
 	table = kmalloc(sizeof *table + num_icm * sizeof *table->icm, GFP_KERNEL);
 	if (!table)
 		return NULL;
 	table->virt     = virt;
 	table->num_icm  = num_icm;
 	table->num_obj  = nobj;
 	table->obj_size = obj_size;
 	table->lowmem   = use_lowmem;
 	table->coherent = use_coherent;
 	mutex_init(&table->mutex);
 	for (i = 0; i < num_icm; ++i)
 		table->icm[i] = NULL;
 	for (i = 0; i * MTHCA_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) {
 		chunk_size = MTHCA_TABLE_CHUNK_SIZE;
 		if ((i + 1) * MTHCA_TABLE_CHUNK_SIZE > nobj * obj_size)
 			chunk_size = nobj * obj_size - i * MTHCA_TABLE_CHUNK_SIZE;
 		table->icm[i] = mthca_alloc_icm(dev, chunk_size >> PAGE_SHIFT,
 						(use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
 						__GFP_NOWARN, use_coherent);
 		if (!table->icm[i])
 			goto err;
 		if (mthca_MAP_ICM(dev, table->icm[i], virt + i * MTHCA_TABLE_CHUNK_SIZE,
 				  &status) || status) {
 			mthca_free_icm(dev, table->icm[i], table->coherent);
 			table->icm[i] = NULL;
 			goto err;
 		}
 		/*
 		 * Add a reference to this ICM chunk so that it never
 		 * gets freed (since it contains reserved firmware objects).
 		 */
 		++table->icm[i]->refcount;
 	}
 	return table;
 err:
 	for (i = 0; i < num_icm; ++i)
 		if (table->icm[i]) {
 			mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE,
 					MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
 					&status);
 			mthca_free_icm(dev, table->icm[i], table->coherent);
 		}
 	kfree(table);
 	return NULL;
 }
 void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table)
 {
 	int i;
 	u8 status;
 	for (i = 0; i < table->num_icm; ++i)
 		if (table->icm[i]) {
 			mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
 					MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
 					&status);
 			mthca_free_icm(dev, table->icm[i], table->coherent);
 		}
 	kfree(table);
 }
 static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int page)
 {
 	return dev->uar_table.uarc_base +
 		uar->index * dev->uar_table.uarc_size +
 		page * MTHCA_ICM_PAGE_SIZE;
 }
 #include <vm/vm_map.h>
 #include <vm/vm_pageout.h>
 #include <vm/pmap.h>
 #include <sys/resource.h>
 #include <sys/resourcevar.h>
 int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
 		      struct mthca_user_db_table *db_tab, int index, u64 uaddr)
 {
 #ifdef __linux__
 	struct page *pages[1];
 	int ret = 0;
 	u8 status;
 	int i;
 	if (!mthca_is_memfree(dev))
 		return 0;
 	if (index < 0 || index > dev->uar_table.uarc_size / 8)
 		return -EINVAL;
 	mutex_lock(&db_tab->mutex);
 	i = index / MTHCA_DB_REC_PER_PAGE;
 	if ((db_tab->page[i].refcount >= MTHCA_DB_REC_PER_PAGE)       ||
 	    (db_tab->page[i].uvirt && db_tab->page[i].uvirt != uaddr) ||
 	    (uaddr & 4095)) {
 		ret = -EINVAL;
 		goto out;
 	}
 	if (db_tab->page[i].refcount) {
 		++db_tab->page[i].refcount;
 		goto out;
 	}
 	ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0,
 			     pages, NULL);
 	if (ret < 0)
 		goto out;
 	sg_set_page(&db_tab->page[i].mem, pages[0], MTHCA_ICM_PAGE_SIZE,
 			uaddr & ~PAGE_MASK);
 	ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
 	if (ret < 0) {
 		put_page(pages[0]);
 		goto out;
 	}
 	ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem),
 				 mthca_uarc_virt(dev, uar, i), &status);
 	if (!ret && status)
 		ret = -EINVAL;
 	if (ret) {
 		pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
 		put_page(sg_page(&db_tab->page[i].mem));
 		goto out;
 	}
 	db_tab->page[i].uvirt    = uaddr;
 	db_tab->page[i].refcount = 1;
 out:
 	mutex_unlock(&db_tab->mutex);
 	return ret;
 #else
 	struct proc *proc;
 	vm_offset_t start;
 	vm_paddr_t paddr;
 	pmap_t pmap;
 	vm_page_t m;
 	int ret = 0;
 	u8 status;
 	int i;
 	if (!mthca_is_memfree(dev))
 		return 0;
 	if (index < 0 || index > dev->uar_table.uarc_size / 8)
 		return -EINVAL;
 	mutex_lock(&db_tab->mutex);
 	i = index / MTHCA_DB_REC_PER_PAGE;
 	start = 0;
 	if ((db_tab->page[i].refcount >= MTHCA_DB_REC_PER_PAGE)       ||
 	    (db_tab->page[i].uvirt && db_tab->page[i].uvirt != uaddr) ||
 	    (uaddr & 4095)) {
 		ret = -EINVAL;
 		goto out;
 	}
 	if (db_tab->page[i].refcount) {
 		++db_tab->page[i].refcount;
 		goto out;
 	}
 	proc = curproc;
 	pmap = vm_map_pmap(&proc->p_vmspace->vm_map);
 	PROC_LOCK(proc);
 	if (ptoa(pmap_wired_count(pmap) + 1) >
 	    lim_cur_proc(proc, RLIMIT_MEMLOCK)) {
 		PROC_UNLOCK(proc);
 		ret = -ENOMEM;
 		goto out;
 	}
 	PROC_UNLOCK(proc);
 	if (vm_cnt.v_wire_count + 1 > vm_page_max_wired) {
 		ret = -EAGAIN;
 		goto out;
 	}
 	start = uaddr & PAGE_MASK;
 	ret = vm_map_wire(&proc->p_vmspace->vm_map, start, start + PAGE_SIZE,
 	    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES | VM_MAP_WIRE_WRITE);
        if (ret != KERN_SUCCESS) {
 		start = 0;
 		ret = -ENOMEM;
 		goto out;
 	}
 	paddr = pmap_extract(pmap, uaddr);
 	if (paddr == 0) {
 		ret = -EFAULT;
 		goto out;
 	}
 	m = PHYS_TO_VM_PAGE(paddr);
 	sg_set_page(&db_tab->page[i].mem, m, MTHCA_ICM_PAGE_SIZE,
 			uaddr & ~PAGE_MASK);
 	ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
 	if (ret < 0)
 		goto out;
 	ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem),
 				 mthca_uarc_virt(dev, uar, i), &status);
 	if (!ret && status)
 		ret = -EINVAL;
 	if (ret) {
 		pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
 		goto out;
 	}
 	db_tab->page[i].uvirt    = uaddr;
 	db_tab->page[i].refcount = 1;
 out:
 	if (ret < 0 && start)
 		vm_map_unwire(&curthread->td_proc->p_vmspace->vm_map,
 		    start, start + PAGE_SIZE,
 		    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
 	mutex_unlock(&db_tab->mutex);
 	return ret;
 #endif
 }
 void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
 			 struct mthca_user_db_table *db_tab, int index)
 {
 	if (!mthca_is_memfree(dev))
 		return;
 	/*
 	 * To make our bookkeeping simpler, we don't unmap DB
 	 * pages until we clean up the whole db table.
 	 */
 	mutex_lock(&db_tab->mutex);
 	--db_tab->page[index / MTHCA_DB_REC_PER_PAGE].refcount;
 	mutex_unlock(&db_tab->mutex);
 }
 struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev)
 {
 	struct mthca_user_db_table *db_tab;
 	int npages;
 	int i;
 	if (!mthca_is_memfree(dev))
 		return NULL;
 	npages = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE;
 	db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL);
 	if (!db_tab)
 		return ERR_PTR(-ENOMEM);
 	mutex_init(&db_tab->mutex);
 	for (i = 0; i < npages; ++i) {
 		db_tab->page[i].refcount = 0;
 		db_tab->page[i].uvirt    = 0;
 		sg_init_table(&db_tab->page[i].mem, 1);
 	}
 	return db_tab;
 }
 void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
 			       struct mthca_user_db_table *db_tab)
 {
 	int i;
 	u8 status;
 	if (!mthca_is_memfree(dev))
 		return;
 	for (i = 0; i < dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE; ++i) {
 		if (db_tab->page[i].uvirt) {
 			mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status);
 			pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
 #ifdef __linux__
 			put_page(sg_page(&db_tab->page[i].mem));
 #else
 			vm_offset_t start;
 			start = db_tab->page[i].uvirt & PAGE_MASK;
 			vm_map_unwire(&curthread->td_proc->p_vmspace->vm_map,
 			    start, start + PAGE_SIZE,
 			    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
 #endif
 		}
 	}
 	kfree(db_tab);
 }
 int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
 		   u32 qn, __be32 **db)
 {
 	int group;
 	int start, end, dir;
 	int i, j;
 	struct mthca_db_page *page;
 	int ret = 0;
 	u8 status;
 	mutex_lock(&dev->db_tab->mutex);
 	switch (type) {
 	case MTHCA_DB_TYPE_CQ_ARM:
 	case MTHCA_DB_TYPE_SQ:
 		group = 0;
 		start = 0;
 		end   = dev->db_tab->max_group1;
 		dir   = 1;
 		break;
 	case MTHCA_DB_TYPE_CQ_SET_CI:
 	case MTHCA_DB_TYPE_RQ:
 	case MTHCA_DB_TYPE_SRQ:
 		group = 1;
 		start = dev->db_tab->npages - 1;
 		end   = dev->db_tab->min_group2;
 		dir   = -1;
 		break;
 	default:
 		ret = -EINVAL;
 		goto out;
 	}
 	for (i = start; i != end; i += dir)
 		if (dev->db_tab->page[i].db_rec &&
 		    !bitmap_full(dev->db_tab->page[i].used,
 				 MTHCA_DB_REC_PER_PAGE)) {
 			page = dev->db_tab->page + i;
 			goto found;
 		}
 	for (i = start; i != end; i += dir)
 		if (!dev->db_tab->page[i].db_rec) {
 			page = dev->db_tab->page + i;
 			goto alloc;
 		}
 	if (dev->db_tab->max_group1 >= dev->db_tab->min_group2 - 1) {
 		ret = -ENOMEM;
 		goto out;
 	}
 	if (group == 0)
 		++dev->db_tab->max_group1;
 	else
 		--dev->db_tab->min_group2;
 	page = dev->db_tab->page + end;
 alloc:
 	page->db_rec = dma_alloc_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
 					  &page->mapping, GFP_KERNEL);
 	if (!page->db_rec) {
 		ret = -ENOMEM;
 		goto out;
 	}
 	memset(page->db_rec, 0, MTHCA_ICM_PAGE_SIZE);
 	ret = mthca_MAP_ICM_page(dev, page->mapping,
 				 mthca_uarc_virt(dev, &dev->driver_uar, i), &status);
 	if (!ret && status)
 		ret = -EINVAL;
 	if (ret) {
 		dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
 				  page->db_rec, page->mapping);
 		goto out;
 	}
 	bitmap_zero(page->used, MTHCA_DB_REC_PER_PAGE);
 found:
 	j = find_first_zero_bit(page->used, MTHCA_DB_REC_PER_PAGE);
 	set_bit(j, page->used);
 	if (group == 1)
 		j = MTHCA_DB_REC_PER_PAGE - 1 - j;
 	ret = i * MTHCA_DB_REC_PER_PAGE + j;
 	page->db_rec[j] = cpu_to_be64((qn << 8) | (type << 5));
 	*db = (__be32 *) &page->db_rec[j];
 out:
 	mutex_unlock(&dev->db_tab->mutex);
 	return ret;
 }
 void mthca_free_db(struct mthca_dev *dev, int type, int db_index)
 {
 	int i, j;
 	struct mthca_db_page *page;
 	u8 status;
 	i = db_index / MTHCA_DB_REC_PER_PAGE;
 	j = db_index % MTHCA_DB_REC_PER_PAGE;
 	page = dev->db_tab->page + i;
 	mutex_lock(&dev->db_tab->mutex);
 	page->db_rec[j] = 0;
 	if (i >= dev->db_tab->min_group2)
 		j = MTHCA_DB_REC_PER_PAGE - 1 - j;
 	clear_bit(j, page->used);
 	if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) &&
 	    i >= dev->db_tab->max_group1 - 1) {
 		mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
 		dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
 				  page->db_rec, page->mapping);
 		page->db_rec = NULL;
 		if (i == dev->db_tab->max_group1) {
 			--dev->db_tab->max_group1;
 			/* XXX may be able to unmap more pages now */
 		}
 		if (i == dev->db_tab->min_group2)
 			++dev->db_tab->min_group2;
 	}
 	mutex_unlock(&dev->db_tab->mutex);
 }
 int mthca_init_db_tab(struct mthca_dev *dev)
 {
 	int i;
 	if (!mthca_is_memfree(dev))
 		return 0;
 	dev->db_tab = kmalloc(sizeof *dev->db_tab, GFP_KERNEL);
 	if (!dev->db_tab)
 		return -ENOMEM;
 	mutex_init(&dev->db_tab->mutex);
 	dev->db_tab->npages     = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE;
 	dev->db_tab->max_group1 = 0;
 	dev->db_tab->min_group2 = dev->db_tab->npages - 1;
 	dev->db_tab->page = kmalloc(dev->db_tab->npages *
 				    sizeof *dev->db_tab->page,
 				    GFP_KERNEL);
 	if (!dev->db_tab->page) {
 		kfree(dev->db_tab);
 		return -ENOMEM;
 	}
 	for (i = 0; i < dev->db_tab->npages; ++i)
 		dev->db_tab->page[i].db_rec = NULL;
 	return 0;
 }
 void mthca_cleanup_db_tab(struct mthca_dev *dev)
 {
 	int i;
 	u8 status;
 	if (!mthca_is_memfree(dev))
 		return;
 	/*
 	 * Because we don't always free our UARC pages when they
 	 * become empty to make mthca_free_db() simpler we need to
 	 * make a sweep through the doorbell pages and free any
 	 * leftover pages now.
 	 */
 	for (i = 0; i < dev->db_tab->npages; ++i) {
 		if (!dev->db_tab->page[i].db_rec)
 			continue;
 		if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE))
 			mthca_warn(dev, "Kernel UARC page %d not empty\n", i);
 		mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
 		dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
 				  dev->db_tab->page[i].db_rec,
 				  dev->db_tab->page[i].mapping);
 	}
 	kfree(dev->db_tab->page);
 	kfree(dev->db_tab);
 }
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_memfree.h
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_memfree.h
@ -1,179 +0,0 @@
 /*
 * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
 * Copyright (c) 2005 Cisco Systems.  All rights reserved.
 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #ifndef MTHCA_MEMFREE_H
 #define MTHCA_MEMFREE_H
 #include <linux/list.h>
 #include <linux/mutex.h>
 #define MTHCA_ICM_CHUNK_LEN \
 	((256 - sizeof (struct list_head) - 2 * sizeof (int)) /		\
 	 (sizeof (struct scatterlist)))
 enum {
 	MTHCA_ICM_PAGE_SHIFT	= 12,
 	MTHCA_ICM_PAGE_SIZE	= 1 << MTHCA_ICM_PAGE_SHIFT,
 	MTHCA_DB_REC_PER_PAGE	= MTHCA_ICM_PAGE_SIZE / 8
 };
 struct mthca_icm_chunk {
 	struct list_head   list;
 	int                npages;
 	int                nsg;
 	struct scatterlist mem[MTHCA_ICM_CHUNK_LEN];
 };
 struct mthca_icm {
 	struct list_head chunk_list;
 	int              refcount;
 };
 struct mthca_icm_table {
 	u64               virt;
 	int               num_icm;
 	int               num_obj;
 	int               obj_size;
 	int               lowmem;
 	int               coherent;
 	struct mutex      mutex;
 	struct mthca_icm *icm[0];
 };
 struct mthca_icm_iter {
 	struct mthca_icm       *icm;
 	struct mthca_icm_chunk *chunk;
 	int                     page_idx;
 };
 struct mthca_dev;
 struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
 				  gfp_t gfp_mask, int coherent);
 void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent);
 struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
 					      u64 virt, int obj_size,
 					      int nobj, int reserved,
 					      int use_lowmem, int use_coherent);
 void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table);
 int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj);
 void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj);
 void *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_handle);
 int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table,
 			  int start, int end);
 void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,
 			   int start, int end);
 static inline void mthca_icm_first(struct mthca_icm *icm,
 				   struct mthca_icm_iter *iter)
 {
 	iter->icm      = icm;
 	iter->chunk    = list_empty(&icm->chunk_list) ?
 		NULL : list_entry(icm->chunk_list.next,
 				  struct mthca_icm_chunk, list);
 	iter->page_idx = 0;
 }
 static inline int mthca_icm_last(struct mthca_icm_iter *iter)
 {
 	return !iter->chunk;
 }
 static inline void mthca_icm_next(struct mthca_icm_iter *iter)
 {
 	if (++iter->page_idx >= iter->chunk->nsg) {
 		if (iter->chunk->list.next == &iter->icm->chunk_list) {
 			iter->chunk = NULL;
 			return;
 		}
 		iter->chunk = list_entry(iter->chunk->list.next,
 					 struct mthca_icm_chunk, list);
 		iter->page_idx = 0;
 	}
 }
 static inline dma_addr_t mthca_icm_addr(struct mthca_icm_iter *iter)
 {
 	return sg_dma_address(&iter->chunk->mem[iter->page_idx]);
 }
 static inline unsigned long mthca_icm_size(struct mthca_icm_iter *iter)
 {
 	return sg_dma_len(&iter->chunk->mem[iter->page_idx]);
 }
 struct mthca_db_page {
 	DECLARE_BITMAP(used, MTHCA_DB_REC_PER_PAGE);
 	__be64    *db_rec;
 	dma_addr_t mapping;
 };
 struct mthca_db_table {
 	int 	       	      npages;
 	int 	       	      max_group1;
 	int 	       	      min_group2;
 	struct mthca_db_page *page;
 	struct mutex          mutex;
 };
 enum mthca_db_type {
 	MTHCA_DB_TYPE_INVALID   = 0x0,
 	MTHCA_DB_TYPE_CQ_SET_CI = 0x1,
 	MTHCA_DB_TYPE_CQ_ARM    = 0x2,
 	MTHCA_DB_TYPE_SQ        = 0x3,
 	MTHCA_DB_TYPE_RQ        = 0x4,
 	MTHCA_DB_TYPE_SRQ       = 0x5,
 	MTHCA_DB_TYPE_GROUP_SEP = 0x7
 };
 struct mthca_user_db_table;
 struct mthca_uar;
 int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
 		      struct mthca_user_db_table *db_tab, int index, u64 uaddr);
 void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
 			 struct mthca_user_db_table *db_tab, int index);
 struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev);
 void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
 			       struct mthca_user_db_table *db_tab);
 int mthca_init_db_tab(struct mthca_dev *dev);
 void mthca_cleanup_db_tab(struct mthca_dev *dev);
 int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
 		   u32 qn, __be32 **db);
 void mthca_free_db(struct mthca_dev *dev, int type, int db_index);
 #endif /* MTHCA_MEMFREE_H */
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_mr.c
@ -1,985 +0,0 @@
 /*
 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #include <linux/slab.h>
 #include <linux/errno.h>
 #include "mthca_dev.h"
 #include "mthca_cmd.h"
 #include "mthca_memfree.h"
 struct mthca_mtt {
 	struct mthca_buddy *buddy;
 	int                 order;
 	u32                 first_seg;
 };
 /*
 * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
 */
 struct mthca_mpt_entry {
 	__be32 flags;
 	__be32 page_size;
 	__be32 key;
 	__be32 pd;
 	__be64 start;
 	__be64 length;
 	__be32 lkey;
 	__be32 window_count;
 	__be32 window_count_limit;
 	__be64 mtt_seg;
 	__be32 mtt_sz;		/* Arbel only */
 	u32    reserved[2];
 } __attribute__((packed));
 #define MTHCA_MPT_FLAG_SW_OWNS       (0xfUL << 28)
 #define MTHCA_MPT_FLAG_MIO           (1 << 17)
 #define MTHCA_MPT_FLAG_BIND_ENABLE   (1 << 15)
 #define MTHCA_MPT_FLAG_PHYSICAL      (1 <<  9)
 #define MTHCA_MPT_FLAG_REGION        (1 <<  8)
 #define MTHCA_MTT_FLAG_PRESENT       1
 #define MTHCA_MPT_STATUS_SW 0xF0
 #define MTHCA_MPT_STATUS_HW 0x00
 #define SINAI_FMR_KEY_INC 0x1000000
 /*
 * Buddy allocator for MTT segments (currently not very efficient
 * since it doesn't keep a free list and just searches linearly
 * through the bitmaps)
 */
 static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order)
 {
 	int o;
 	int m;
 	u32 seg;
 	spin_lock(&buddy->lock);
 	for (o = order; o <= buddy->max_order; ++o)
 		if (buddy->num_free[o]) {
 			m = 1 << (buddy->max_order - o);
 			seg = find_first_bit(buddy->bits[o], m);
 			if (seg < m)
 				goto found;
 		}
 	spin_unlock(&buddy->lock);
 	return -1;
 found:
 	clear_bit(seg, buddy->bits[o]);
 	--buddy->num_free[o];
 	while (o > order) {
 		--o;
 		seg <<= 1;
 		set_bit(seg ^ 1, buddy->bits[o]);
 		++buddy->num_free[o];
 	}
 	spin_unlock(&buddy->lock);
 	seg <<= order;
 	return seg;
 }
 static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order)
 {
 	seg >>= order;
 	spin_lock(&buddy->lock);
 	while (test_bit(seg ^ 1, buddy->bits[order])) {
 		clear_bit(seg ^ 1, buddy->bits[order]);
 		--buddy->num_free[order];
 		seg >>= 1;
 		++order;
 	}
 	set_bit(seg, buddy->bits[order]);
 	++buddy->num_free[order];
 	spin_unlock(&buddy->lock);
 }
 static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
 {
 	int i, s;
 	buddy->max_order = max_order;
 	spin_lock_init(&buddy->lock);
 	buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *),
 			      GFP_KERNEL);
 	buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *),
 				  GFP_KERNEL);
 	if (!buddy->bits || !buddy->num_free)
 		goto err_out;
 	for (i = 0; i <= buddy->max_order; ++i) {
 		s = BITS_TO_LONGS(1 << (buddy->max_order - i));
 		buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL);
 		if (!buddy->bits[i])
 			goto err_out_free;
 		bitmap_zero(buddy->bits[i],
 			    1 << (buddy->max_order - i));
 	}
 	set_bit(0, buddy->bits[buddy->max_order]);
 	buddy->num_free[buddy->max_order] = 1;
 	return 0;
 err_out_free:
 	for (i = 0; i <= buddy->max_order; ++i)
 		kfree(buddy->bits[i]);
 err_out:
 	kfree(buddy->bits);
 	kfree(buddy->num_free);
 	return -ENOMEM;
 }
 static void mthca_buddy_cleanup(struct mthca_buddy *buddy)
 {
 	int i;
 	for (i = 0; i <= buddy->max_order; ++i)
 		kfree(buddy->bits[i]);
 	kfree(buddy->bits);
 	kfree(buddy->num_free);
 }
 static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order,
 				 struct mthca_buddy *buddy)
 {
 	u32 seg = mthca_buddy_alloc(buddy, order);
 	if (seg == -1)
 		return -1;
 	if (mthca_is_memfree(dev))
 		if (mthca_table_get_range(dev, dev->mr_table.mtt_table, seg,
 					  seg + (1 << order) - 1)) {
 			mthca_buddy_free(buddy, seg, order);
 			seg = -1;
 		}
 	return seg;
 }
 static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size,
 					   struct mthca_buddy *buddy)
 {
 	struct mthca_mtt *mtt;
 	int i;
 	if (size <= 0)
 		return ERR_PTR(-EINVAL);
 	mtt = kmalloc(sizeof *mtt, GFP_KERNEL);
 	if (!mtt)
 		return ERR_PTR(-ENOMEM);
 	mtt->buddy = buddy;
 	mtt->order = 0;
 	for (i = dev->limits.mtt_seg_size / 8; i < size; i <<= 1)
 		++mtt->order;
 	mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy);
 	if (mtt->first_seg == -1) {
 		kfree(mtt);
 		return ERR_PTR(-ENOMEM);
 	}
 	return mtt;
 }
 struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size)
 {
 	return __mthca_alloc_mtt(dev, size, &dev->mr_table.mtt_buddy);
 }
 void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt)
 {
 	if (!mtt)
 		return;
 	mthca_buddy_free(mtt->buddy, mtt->first_seg, mtt->order);
 	mthca_table_put_range(dev, dev->mr_table.mtt_table,
 			      mtt->first_seg,
 			      mtt->first_seg + (1 << mtt->order) - 1);
 	kfree(mtt);
 }
 static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
 			     int start_index, u64 *buffer_list, int list_len)
 {
 	struct mthca_mailbox *mailbox;
 	__be64 *mtt_entry;
 	int err = 0;
 	u8 status;
 	int i;
 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 	if (IS_ERR(mailbox))
 		return PTR_ERR(mailbox);
 	mtt_entry = mailbox->buf;
 	while (list_len > 0) {
 		mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
 					   mtt->first_seg * dev->limits.mtt_seg_size +
 					   start_index * 8);
 		mtt_entry[1] = 0;
 		for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i)
 			mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] |
 						       MTHCA_MTT_FLAG_PRESENT);
 		/*
 		 * If we have an odd number of entries to write, add
 		 * one more dummy entry for firmware efficiency.
 		 */
 		if (i & 1)
 			mtt_entry[i + 2] = 0;
 		err = mthca_WRITE_MTT(dev, mailbox, (i + 1) & ~1, &status);
 		if (err) {
 			mthca_warn(dev, "WRITE_MTT failed (%d)\n", err);
 			goto out;
 		}
 		if (status) {
 			mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n",
 				   status);
 			err = -EINVAL;
 			goto out;
 		}
 		list_len    -= i;
 		start_index += i;
 		buffer_list += i;
 	}
 out:
 	mthca_free_mailbox(dev, mailbox);
 	return err;
 }
 int mthca_write_mtt_size(struct mthca_dev *dev)
 {
 	if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
 	    !(dev->mthca_flags & MTHCA_FLAG_FMR))
 		/*
 		 * Be friendly to WRITE_MTT command
 		 * and leave two empty slots for the
 		 * index and reserved fields of the
 		 * mailbox.
 		 */
 		return PAGE_SIZE / sizeof (u64) - 2;
 	/* For Arbel, all MTTs must fit in the same page. */
 	return mthca_is_memfree(dev) ? (PAGE_SIZE / sizeof (u64)) : 0x7ffffff;
 }
 static void mthca_tavor_write_mtt_seg(struct mthca_dev *dev,
 				      struct mthca_mtt *mtt, int start_index,
 				      u64 *buffer_list, int list_len)
 {
 	u64 __iomem *mtts;
 	int i;
 	mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * dev->limits.mtt_seg_size +
 		start_index * sizeof (u64);
 	for (i = 0; i < list_len; ++i)
 		mthca_write64_raw(cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT),
 				  mtts + i);
 }
 static void mthca_arbel_write_mtt_seg(struct mthca_dev *dev,
 				      struct mthca_mtt *mtt, int start_index,
 				      u64 *buffer_list, int list_len)
 {
 	__be64 *mtts;
 	dma_addr_t dma_handle;
 	int i;
 	int s = start_index * sizeof (u64);
 	/* For Arbel, all MTTs must fit in the same page. */
 	BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE);
 	/* Require full segments */
 	BUG_ON(s % dev->limits.mtt_seg_size);
 	mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg +
 				s / dev->limits.mtt_seg_size, &dma_handle);
 	BUG_ON(!mtts);
 	for (i = 0; i < list_len; ++i)
 		mtts[i] = cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT);
 	dma_sync_single(&dev->pdev->dev, dma_handle, list_len * sizeof (u64), DMA_TO_DEVICE);
 }
 int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
 		    int start_index, u64 *buffer_list, int list_len)
 {
 	int size = mthca_write_mtt_size(dev);
 	int chunk;
 	if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
 	    !(dev->mthca_flags & MTHCA_FLAG_FMR))
 		return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len);
 	while (list_len > 0) {
 		chunk = min(size, list_len);
 		if (mthca_is_memfree(dev))
 			mthca_arbel_write_mtt_seg(dev, mtt, start_index,
 						  buffer_list, chunk);
 		else
 			mthca_tavor_write_mtt_seg(dev, mtt, start_index,
 						  buffer_list, chunk);
 		list_len    -= chunk;
 		start_index += chunk;
 		buffer_list += chunk;
 	}
 	return 0;
 }
 static inline u32 tavor_hw_index_to_key(u32 ind)
 {
 	return ind;
 }
 static inline u32 tavor_key_to_hw_index(u32 key)
 {
 	return key;
 }
 static inline u32 arbel_hw_index_to_key(u32 ind)
 {
 	return (ind >> 24) | (ind << 8);
 }
 static inline u32 arbel_key_to_hw_index(u32 key)
 {
 	return (key << 24) | (key >> 8);
 }
 static inline u32 hw_index_to_key(struct mthca_dev *dev, u32 ind)
 {
 	if (mthca_is_memfree(dev))
 		return arbel_hw_index_to_key(ind);
 	else
 		return tavor_hw_index_to_key(ind);
 }
 static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
 {
 	if (mthca_is_memfree(dev))
 		return arbel_key_to_hw_index(key);
 	else
 		return tavor_key_to_hw_index(key);
 }
 static inline u32 adjust_key(struct mthca_dev *dev, u32 key)
 {
 	if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
 		return ((key << 20) & 0x800000) | (key & 0x7fffff);
 	else
 		return key;
 }
 int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
 		   u64 iova, u64 total_size, u32 access, struct mthca_mr *mr)
 {
 	struct mthca_mailbox *mailbox;
 	struct mthca_mpt_entry *mpt_entry;
 	u32 key;
 	int i;
 	int err;
 	u8 status;
 	WARN_ON(buffer_size_shift >= 32);
 	key = mthca_alloc(&dev->mr_table.mpt_alloc);
 	if (key == -1)
 		return -ENOMEM;
 	key = adjust_key(dev, key);
 	mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
 	if (mthca_is_memfree(dev)) {
 		err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
 		if (err)
 			goto err_out_mpt_free;
 	}
 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 	if (IS_ERR(mailbox)) {
 		err = PTR_ERR(mailbox);
 		goto err_out_table;
 	}
 	mpt_entry = mailbox->buf;
 	mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
 				       MTHCA_MPT_FLAG_MIO         |
 				       MTHCA_MPT_FLAG_REGION      |
 				       access);
 	if (!mr->mtt)
 		mpt_entry->flags |= cpu_to_be32(MTHCA_MPT_FLAG_PHYSICAL);
 	mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12);
 	mpt_entry->key       = cpu_to_be32(key);
 	mpt_entry->pd        = cpu_to_be32(pd);
 	mpt_entry->start     = cpu_to_be64(iova);
 	mpt_entry->length    = cpu_to_be64(total_size);
 	memset(&mpt_entry->lkey, 0,
 	       sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
 	if (mr->mtt)
 		mpt_entry->mtt_seg =
 			cpu_to_be64(dev->mr_table.mtt_base +
 				    mr->mtt->first_seg * dev->limits.mtt_seg_size);
 	if (0) {
 		mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
 		for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
 			if (i % 4 == 0)
 				printk("[%02x] ", i * 4);
 			printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i]));
 			if ((i + 1) % 4 == 0)
 				printk("\n");
 		}
 	}
 	err = mthca_SW2HW_MPT(dev, mailbox,
 			      key & (dev->limits.num_mpts - 1),
 			      &status);
 	if (err) {
 		mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
 		goto err_out_mailbox;
 	} else if (status) {
 		mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
 			   status);
 		err = -EINVAL;
 		goto err_out_mailbox;
 	}
 	mthca_free_mailbox(dev, mailbox);
 	return err;
 err_out_mailbox:
 	mthca_free_mailbox(dev, mailbox);
 err_out_table:
 	mthca_table_put(dev, dev->mr_table.mpt_table, key);
 err_out_mpt_free:
 	mthca_free(&dev->mr_table.mpt_alloc, key);
 	return err;
 }
 int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
 			   u32 access, struct mthca_mr *mr)
 {
 	mr->mtt = NULL;
 	return mthca_mr_alloc(dev, pd, 12, 0, ~0ULL, access, mr);
 }
 int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
 			u64 *buffer_list, int buffer_size_shift,
 			int list_len, u64 iova, u64 total_size,
 			u32 access, struct mthca_mr *mr)
 {
 	int err;
 	mr->mtt = mthca_alloc_mtt(dev, list_len);
 	if (IS_ERR(mr->mtt))
 		return PTR_ERR(mr->mtt);
 	err = mthca_write_mtt(dev, mr->mtt, 0, buffer_list, list_len);
 	if (err) {
 		mthca_free_mtt(dev, mr->mtt);
 		return err;
 	}
 	err = mthca_mr_alloc(dev, pd, buffer_size_shift, iova,
 			     total_size, access, mr);
 	if (err)
 		mthca_free_mtt(dev, mr->mtt);
 	return err;
 }
 /* Free mr or fmr */
 static void mthca_free_region(struct mthca_dev *dev, u32 lkey)
 {
 	mthca_table_put(dev, dev->mr_table.mpt_table,
 			key_to_hw_index(dev, lkey));
 	mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey));
 }
 void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
 {
 	int err;
 	u8 status;
 	err = mthca_HW2SW_MPT(dev, NULL,
 			      key_to_hw_index(dev, mr->ibmr.lkey) &
 			      (dev->limits.num_mpts - 1),
 			      &status);
 	if (err)
 		mthca_warn(dev, "HW2SW_MPT failed (%d)\n", err);
 	else if (status)
 		mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n",
 			   status);
 	mthca_free_region(dev, mr->ibmr.lkey);
 	mthca_free_mtt(dev, mr->mtt);
 }
 int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
 		    u32 access, struct mthca_fmr *mr)
 {
 	struct mthca_mpt_entry *mpt_entry;
 	struct mthca_mailbox *mailbox;
 	u64 mtt_seg;
 	u32 key, idx;
 	u8 status;
 	int list_len = mr->attr.max_pages;
 	int err = -ENOMEM;
 	int i;
 	if (mr->attr.page_shift < 12 || mr->attr.page_shift >= 32)
 		return -EINVAL;
 	/* For Arbel, all MTTs must fit in the same page. */
 	if (mthca_is_memfree(dev) &&
 	    mr->attr.max_pages * sizeof *mr->mem.arbel.mtts > PAGE_SIZE)
 		return -EINVAL;
 	mr->maps = 0;
 	key = mthca_alloc(&dev->mr_table.mpt_alloc);
 	if (key == -1)
 		return -ENOMEM;
 	key = adjust_key(dev, key);
 	idx = key & (dev->limits.num_mpts - 1);
 	mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
 	if (mthca_is_memfree(dev)) {
 		err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
 		if (err)
 			goto err_out_mpt_free;
 		mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key, NULL);
 		BUG_ON(!mr->mem.arbel.mpt);
 	} else
 		mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
 			sizeof *(mr->mem.tavor.mpt) * idx;
 	mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy);
 	if (IS_ERR(mr->mtt)) {
 		err = PTR_ERR(mr->mtt);
 		goto err_out_table;
 	}
 	mtt_seg = mr->mtt->first_seg * dev->limits.mtt_seg_size;
 	if (mthca_is_memfree(dev)) {
 		mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
 						      mr->mtt->first_seg,
 						      &mr->mem.arbel.dma_handle);
 		BUG_ON(!mr->mem.arbel.mtts);
 	} else
 		mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg;
 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 	if (IS_ERR(mailbox)) {
 		err = PTR_ERR(mailbox);
 		goto err_out_free_mtt;
 	}
 	mpt_entry = mailbox->buf;
 	mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
 				       MTHCA_MPT_FLAG_MIO         |
 				       MTHCA_MPT_FLAG_REGION      |
 				       access);
 	mpt_entry->page_size = cpu_to_be32(mr->attr.page_shift - 12);
 	mpt_entry->key       = cpu_to_be32(key);
 	mpt_entry->pd        = cpu_to_be32(pd);
 	memset(&mpt_entry->start, 0,
 	       sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, start));
 	mpt_entry->mtt_seg   = cpu_to_be64(dev->mr_table.mtt_base + mtt_seg);
 	if (0) {
 		mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
 		for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
 			if (i % 4 == 0)
 				printk("[%02x] ", i * 4);
 			printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i]));
 			if ((i + 1) % 4 == 0)
 				printk("\n");
 		}
 	}
 	err = mthca_SW2HW_MPT(dev, mailbox,
 			      key & (dev->limits.num_mpts - 1),
 			      &status);
 	if (err) {
 		mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
 		goto err_out_mailbox_free;
 	}
 	if (status) {
 		mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
 			   status);
 		err = -EINVAL;
 		goto err_out_mailbox_free;
 	}
 	mthca_free_mailbox(dev, mailbox);
 	return 0;
 err_out_mailbox_free:
 	mthca_free_mailbox(dev, mailbox);
 err_out_free_mtt:
 	mthca_free_mtt(dev, mr->mtt);
 err_out_table:
 	mthca_table_put(dev, dev->mr_table.mpt_table, key);
 err_out_mpt_free:
 	mthca_free(&dev->mr_table.mpt_alloc, key);
 	return err;
 }
 int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr)
 {
 	if (fmr->maps)
 		return -EBUSY;
 	mthca_free_region(dev, fmr->ibmr.lkey);
 	mthca_free_mtt(dev, fmr->mtt);
 	return 0;
 }
 static inline int mthca_check_fmr(struct mthca_fmr *fmr, u64 *page_list,
 				  int list_len, u64 iova)
 {
 	int i, page_mask;
 	if (list_len > fmr->attr.max_pages)
 		return -EINVAL;
 	page_mask = (1 << fmr->attr.page_shift) - 1;
 	/* We are getting page lists, so va must be page aligned. */
 	if (iova & page_mask)
 		return -EINVAL;
 	/* Trust the user not to pass misaligned data in page_list */
 	if (0)
 		for (i = 0; i < list_len; ++i) {
 			if (page_list[i] & ~page_mask)
 				return -EINVAL;
 		}
 	if (fmr->maps >= fmr->attr.max_maps)
 		return -EINVAL;
 	return 0;
 }
 int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
 			     int list_len, u64 iova)
 {
 	struct mthca_fmr *fmr = to_mfmr(ibfmr);
 	struct mthca_dev *dev = to_mdev(ibfmr->device);
 	struct mthca_mpt_entry mpt_entry;
 	u32 key;
 	int i, err;
 	err = mthca_check_fmr(fmr, page_list, list_len, iova);
 	if (err)
 		return err;
 	++fmr->maps;
 	key = tavor_key_to_hw_index(fmr->ibmr.lkey);
 	key += dev->limits.num_mpts;
 	fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
 	writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
 	for (i = 0; i < list_len; ++i) {
 		__be64 mtt_entry = cpu_to_be64(page_list[i] |
 					       MTHCA_MTT_FLAG_PRESENT);
 		mthca_write64_raw(mtt_entry, fmr->mem.tavor.mtts + i);
 	}
 	mpt_entry.lkey   = cpu_to_be32(key);
 	mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
 	mpt_entry.start  = cpu_to_be64(iova);
 	__raw_writel((__force u32) mpt_entry.lkey, &fmr->mem.tavor.mpt->key);
 	memcpy_toio(&fmr->mem.tavor.mpt->start, &mpt_entry.start,
 		    offsetof(struct mthca_mpt_entry, window_count) -
 		    offsetof(struct mthca_mpt_entry, start));
 	writeb(MTHCA_MPT_STATUS_HW, fmr->mem.tavor.mpt);
 	return 0;
 }
 int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
 			     int list_len, u64 iova)
 {
 	struct mthca_fmr *fmr = to_mfmr(ibfmr);
 	struct mthca_dev *dev = to_mdev(ibfmr->device);
 	u32 key;
 	int i, err;
 	err = mthca_check_fmr(fmr, page_list, list_len, iova);
 	if (err)
 		return err;
 	++fmr->maps;
 	key = arbel_key_to_hw_index(fmr->ibmr.lkey);
 	if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
 		key += SINAI_FMR_KEY_INC;
 	else
 		key += dev->limits.num_mpts;
 	fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
 	*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
 	wmb();
 	for (i = 0; i < list_len; ++i)
 		fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] |
 						     MTHCA_MTT_FLAG_PRESENT);
 	dma_sync_single(&dev->pdev->dev, fmr->mem.arbel.dma_handle,
 			list_len * sizeof(u64), DMA_TO_DEVICE);
 	fmr->mem.arbel.mpt->key    = cpu_to_be32(key);
 	fmr->mem.arbel.mpt->lkey   = cpu_to_be32(key);
 	fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
 	fmr->mem.arbel.mpt->start  = cpu_to_be64(iova);
 	wmb();
 	*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_HW;
 	wmb();
 	return 0;
 }
 void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
 {
 	if (!fmr->maps)
 		return;
 	fmr->maps = 0;
 	writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
 }
 void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
 {
 	if (!fmr->maps)
 		return;
 	fmr->maps = 0;
 	*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
 }
 int mthca_init_mr_table(struct mthca_dev *dev)
 {
 	unsigned long addr;
 	int mpts, mtts, err, i;
 	err = mthca_alloc_init(&dev->mr_table.mpt_alloc,
 			       dev->limits.num_mpts,
 			       ~0, dev->limits.reserved_mrws);
 	if (err)
 		return err;
 	if (!mthca_is_memfree(dev) &&
 	    (dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN))
 		dev->limits.fmr_reserved_mtts = 0;
 	else
 		dev->mthca_flags |= MTHCA_FLAG_FMR;
 	if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
 		mthca_dbg(dev, "Memory key throughput optimization activated.\n");
 	err = mthca_buddy_init(&dev->mr_table.mtt_buddy,
 			       fls(dev->limits.num_mtt_segs - 1));
 	if (err)
 		goto err_mtt_buddy;
 	dev->mr_table.tavor_fmr.mpt_base = NULL;
 	dev->mr_table.tavor_fmr.mtt_base = NULL;
 	if (dev->limits.fmr_reserved_mtts) {
 		i = fls(dev->limits.fmr_reserved_mtts - 1);
 		if (i >= 31) {
 			mthca_warn(dev, "Unable to reserve 2^31 FMR MTTs.\n");
 			err = -EINVAL;
 			goto err_fmr_mpt;
 		}
 		mpts = mtts = 1 << i;
 	} else {
 		mtts = dev->limits.num_mtt_segs;
 		mpts = dev->limits.num_mpts;
 	}
 	if (!mthca_is_memfree(dev) &&
 	    (dev->mthca_flags & MTHCA_FLAG_FMR)) {
 		addr = pci_resource_start(dev->pdev, 4) +
 			((pci_resource_len(dev->pdev, 4) - 1) &
 			 dev->mr_table.mpt_base);
 		dev->mr_table.tavor_fmr.mpt_base =
 			ioremap(addr, mpts * sizeof(struct mthca_mpt_entry));
 		if (!dev->mr_table.tavor_fmr.mpt_base) {
 			mthca_warn(dev, "MPT ioremap for FMR failed.\n");
 			err = -ENOMEM;
 			goto err_fmr_mpt;
 		}
 		addr = pci_resource_start(dev->pdev, 4) +
 			((pci_resource_len(dev->pdev, 4) - 1) &
 			 dev->mr_table.mtt_base);
 		dev->mr_table.tavor_fmr.mtt_base =
 			ioremap(addr, mtts * dev->limits.mtt_seg_size);
 		if (!dev->mr_table.tavor_fmr.mtt_base) {
 			mthca_warn(dev, "MTT ioremap for FMR failed.\n");
 			err = -ENOMEM;
 			goto err_fmr_mtt;
 		}
 	}
 	if (dev->limits.fmr_reserved_mtts) {
 		err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, fls(mtts - 1));
 		if (err)
 			goto err_fmr_mtt_buddy;
 		/* Prevent regular MRs from using FMR keys */
 		err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, fls(mtts - 1));
 		if (err)
 			goto err_reserve_fmr;
 		dev->mr_table.fmr_mtt_buddy =
 			&dev->mr_table.tavor_fmr.mtt_buddy;
 	} else
 		dev->mr_table.fmr_mtt_buddy = &dev->mr_table.mtt_buddy;
 	/* FMR table is always the first, take reserved MTTs out of there */
 	if (dev->limits.reserved_mtts) {
 		i = fls(dev->limits.reserved_mtts - 1);
 		if (mthca_alloc_mtt_range(dev, i,
 					  dev->mr_table.fmr_mtt_buddy) == -1) {
 			mthca_warn(dev, "MTT table of order %d is too small.\n",
 				  dev->mr_table.fmr_mtt_buddy->max_order);
 			err = -ENOMEM;
 			goto err_reserve_mtts;
 		}
 	}
 	return 0;
 err_reserve_mtts:
 err_reserve_fmr:
 	if (dev->limits.fmr_reserved_mtts)
 		mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
 err_fmr_mtt_buddy:
 	if (dev->mr_table.tavor_fmr.mtt_base)
 		iounmap(dev->mr_table.tavor_fmr.mtt_base);
 err_fmr_mtt:
 	if (dev->mr_table.tavor_fmr.mpt_base)
 		iounmap(dev->mr_table.tavor_fmr.mpt_base);
 err_fmr_mpt:
 	mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
 err_mtt_buddy:
 	mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
 	return err;
 }
 void mthca_cleanup_mr_table(struct mthca_dev *dev)
 {
 	/* XXX check if any MRs are still allocated? */
 	if (dev->limits.fmr_reserved_mtts)
 		mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
 	mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
 	if (dev->mr_table.tavor_fmr.mtt_base)
 		iounmap(dev->mr_table.tavor_fmr.mtt_base);
 	if (dev->mr_table.tavor_fmr.mpt_base)
 		iounmap(dev->mr_table.tavor_fmr.mpt_base);
 	mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
 }
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_pd.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_pd.c
@ -1,81 +0,0 @@
 /*
 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
 * Copyright (c) 2005 Cisco Systems.  All rights reserved.
 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #include <linux/errno.h>
 #include "mthca_dev.h"
 int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd)
 {
 	int err = 0;
 	pd->privileged = privileged;
 	atomic_set(&pd->sqp_count, 0);
 	pd->pd_num = mthca_alloc(&dev->pd_table.alloc);
 	if (pd->pd_num == -1)
 		return -ENOMEM;
 	if (privileged) {
 		err = mthca_mr_alloc_notrans(dev, pd->pd_num,
 					     MTHCA_MPT_FLAG_LOCAL_READ |
 					     MTHCA_MPT_FLAG_LOCAL_WRITE,
 					     &pd->ntmr);
 		if (err)
 			mthca_free(&dev->pd_table.alloc, pd->pd_num);
 	}
 	return err;
 }
 void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd)
 {
 	if (pd->privileged)
 		mthca_free_mr(dev, &pd->ntmr);
 	mthca_free(&dev->pd_table.alloc, pd->pd_num);
 }
 int mthca_init_pd_table(struct mthca_dev *dev)
 {
 	return mthca_alloc_init(&dev->pd_table.alloc,
 				dev->limits.num_pds,
 				(1 << 24) - 1,
 				dev->limits.reserved_pds);
 }
 void mthca_cleanup_pd_table(struct mthca_dev *dev)
 {
 	/* XXX check if any PDs are still allocated? */
 	mthca_alloc_cleanup(&dev->pd_table.alloc);
 }
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_profile.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_profile.c
@ -1,285 +0,0 @@
 /*
 * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 #include "mthca_profile.h"
 enum {
 	MTHCA_RES_QP,
 	MTHCA_RES_EEC,
 	MTHCA_RES_SRQ,
 	MTHCA_RES_CQ,
 	MTHCA_RES_EQP,
 	MTHCA_RES_EEEC,
 	MTHCA_RES_EQ,
 	MTHCA_RES_RDB,
 	MTHCA_RES_MCG,
 	MTHCA_RES_MPT,
 	MTHCA_RES_MTT,
 	MTHCA_RES_UAR,
 	MTHCA_RES_UDAV,
 	MTHCA_RES_UARC,
 	MTHCA_RES_NUM
 };
 enum {
 	MTHCA_NUM_EQS = 32,
 	MTHCA_NUM_PDS = 1 << 15
 };
 s64 mthca_make_profile(struct mthca_dev *dev,
 		       struct mthca_profile *request,
 		       struct mthca_dev_lim *dev_lim,
 		       struct mthca_init_hca_param *init_hca)
 {
 	struct mthca_resource {
 		u64 size;
 		u64 start;
 		int type;
 		int num;
 		int log_num;
 	};
 	u64 mem_base, mem_avail;
 	s64 total_size = 0;
 	struct mthca_resource *profile;
 	struct mthca_resource tmp;
 	int i, j;
 	profile = kzalloc(MTHCA_RES_NUM * sizeof *profile, GFP_KERNEL);
 	if (!profile)
 		return -ENOMEM;
 	profile[MTHCA_RES_QP].size   = dev_lim->qpc_entry_sz;
 	profile[MTHCA_RES_EEC].size  = dev_lim->eec_entry_sz;
 	profile[MTHCA_RES_SRQ].size  = dev_lim->srq_entry_sz;
 	profile[MTHCA_RES_CQ].size   = dev_lim->cqc_entry_sz;
 	profile[MTHCA_RES_EQP].size  = dev_lim->eqpc_entry_sz;
 	profile[MTHCA_RES_EEEC].size = dev_lim->eeec_entry_sz;
 	profile[MTHCA_RES_EQ].size   = dev_lim->eqc_entry_sz;
 	profile[MTHCA_RES_RDB].size  = MTHCA_RDB_ENTRY_SIZE;
 	profile[MTHCA_RES_MCG].size  = MTHCA_MGM_ENTRY_SIZE;
 	profile[MTHCA_RES_MPT].size  = dev_lim->mpt_entry_sz;
 	profile[MTHCA_RES_MTT].size  = dev->limits.mtt_seg_size;
 	profile[MTHCA_RES_UAR].size  = dev_lim->uar_scratch_entry_sz;
 	profile[MTHCA_RES_UDAV].size = MTHCA_AV_SIZE;
 	profile[MTHCA_RES_UARC].size = request->uarc_size;
 	profile[MTHCA_RES_QP].num    = request->num_qp;
 	profile[MTHCA_RES_SRQ].num   = request->num_srq;
 	profile[MTHCA_RES_EQP].num   = request->num_qp;
 	profile[MTHCA_RES_RDB].num   = request->num_qp * request->rdb_per_qp;
 	profile[MTHCA_RES_CQ].num    = request->num_cq;
 	profile[MTHCA_RES_EQ].num    = MTHCA_NUM_EQS;
 	profile[MTHCA_RES_MCG].num   = request->num_mcg;
 	profile[MTHCA_RES_MPT].num   = request->num_mpt;
 	profile[MTHCA_RES_MTT].num   = request->num_mtt;
 	profile[MTHCA_RES_UAR].num   = request->num_uar;
 	profile[MTHCA_RES_UARC].num  = request->num_uar;
 	profile[MTHCA_RES_UDAV].num  = request->num_udav;
 	for (i = 0; i < MTHCA_RES_NUM; ++i) {
 		profile[i].type     = i;
 		profile[i].log_num  = max(ffs(profile[i].num) - 1, 0);
 		profile[i].size    *= profile[i].num;
 		if (mthca_is_memfree(dev))
 			profile[i].size = max(profile[i].size, (u64) PAGE_SIZE);
 	}
 	if (mthca_is_memfree(dev)) {
 		mem_base  = 0;
 		mem_avail = dev_lim->hca.arbel.max_icm_sz;
 	} else {
 		mem_base  = dev->ddr_start;
 		mem_avail = dev->fw.tavor.fw_start - dev->ddr_start;
 	}
 	/*
 	 * Sort the resources in decreasing order of size.  Since they
 	 * all have sizes that are powers of 2, we'll be able to keep
 	 * resources aligned to their size and pack them without gaps
 	 * using the sorted order.
 	 */
 	for (i = MTHCA_RES_NUM; i > 0; --i)
 		for (j = 1; j < i; ++j) {
 			if (profile[j].size > profile[j - 1].size) {
 				tmp            = profile[j];
 				profile[j]     = profile[j - 1];
 				profile[j - 1] = tmp;
 			}
 		}
 	for (i = 0; i < MTHCA_RES_NUM; ++i) {
 		if (profile[i].size) {
 			profile[i].start = mem_base + total_size;
 			total_size      += profile[i].size;
 		}
 		if (total_size > mem_avail) {
 			mthca_err(dev, "Profile requires 0x%llx bytes; "
 				  "won't fit in 0x%llx bytes of context memory.\n",
 				  (unsigned long long) total_size,
 				  (unsigned long long) mem_avail);
 			kfree(profile);
 			return -ENOMEM;
 		}
 		if (profile[i].size)
 			mthca_dbg(dev, "profile[%2d]--%2d/%2d @ 0x%16llx "
 				  "(size 0x%8llx)\n",
 				  i, profile[i].type, profile[i].log_num,
 				  (unsigned long long) profile[i].start,
 				  (unsigned long long) profile[i].size);
 	}
 	if (mthca_is_memfree(dev))
 		mthca_dbg(dev, "HCA context memory: reserving %d KB\n",
 			  (int) (total_size >> 10));
 	else
 		mthca_dbg(dev, "HCA memory: allocated %d KB/%d KB (%d KB free)\n",
 			  (int) (total_size >> 10), (int) (mem_avail >> 10),
 			  (int) ((mem_avail - total_size) >> 10));
 	for (i = 0; i < MTHCA_RES_NUM; ++i) {
 		switch (profile[i].type) {
 		case MTHCA_RES_QP:
 			dev->limits.num_qps   = profile[i].num;
 			init_hca->qpc_base    = profile[i].start;
 			init_hca->log_num_qps = profile[i].log_num;
 			break;
 		case MTHCA_RES_EEC:
 			dev->limits.num_eecs   = profile[i].num;
 			init_hca->eec_base     = profile[i].start;
 			init_hca->log_num_eecs = profile[i].log_num;
 			break;
 		case MTHCA_RES_SRQ:
 			dev->limits.num_srqs   = profile[i].num;
 			init_hca->srqc_base    = profile[i].start;
 			init_hca->log_num_srqs = profile[i].log_num;
 			break;
 		case MTHCA_RES_CQ:
 			dev->limits.num_cqs   = profile[i].num;
 			init_hca->cqc_base    = profile[i].start;
 			init_hca->log_num_cqs = profile[i].log_num;
 			break;
 		case MTHCA_RES_EQP:
 			init_hca->eqpc_base = profile[i].start;
 			break;
 		case MTHCA_RES_EEEC:
 			init_hca->eeec_base = profile[i].start;
 			break;
 		case MTHCA_RES_EQ:
 			dev->limits.num_eqs   = profile[i].num;
 			init_hca->eqc_base    = profile[i].start;
 			init_hca->log_num_eqs = profile[i].log_num;
 			break;
 		case MTHCA_RES_RDB:
 			for (dev->qp_table.rdb_shift = 0;
 			     request->num_qp << dev->qp_table.rdb_shift < profile[i].num;
 			     ++dev->qp_table.rdb_shift)
 				; /* nothing */
 			dev->qp_table.rdb_base    = (u32) profile[i].start;
 			init_hca->rdb_base        = profile[i].start;
 			break;
 		case MTHCA_RES_MCG:
 			dev->limits.num_mgms      = profile[i].num >> 1;
 			dev->limits.num_amgms     = profile[i].num >> 1;
 			init_hca->mc_base         = profile[i].start;
 			init_hca->log_mc_entry_sz = ffs(MTHCA_MGM_ENTRY_SIZE) - 1;
 			init_hca->log_mc_table_sz = profile[i].log_num;
 			init_hca->mc_hash_sz      = 1 << (profile[i].log_num - 1);
 			break;
 		case MTHCA_RES_MPT:
 			dev->limits.num_mpts   = profile[i].num;
 			dev->mr_table.mpt_base = profile[i].start;
 			init_hca->mpt_base     = profile[i].start;
 			init_hca->log_mpt_sz   = profile[i].log_num;
 			break;
 		case MTHCA_RES_MTT:
 			dev->limits.num_mtt_segs = profile[i].num;
 			dev->mr_table.mtt_base   = profile[i].start;
 			init_hca->mtt_base       = profile[i].start;
 			init_hca->mtt_seg_sz     = ffs(dev->limits.mtt_seg_size) - 7;
 			break;
 		case MTHCA_RES_UAR:
 			dev->limits.num_uars       = profile[i].num;
 			init_hca->uar_scratch_base = profile[i].start;
 			break;
 		case MTHCA_RES_UDAV:
 			dev->av_table.ddr_av_base = profile[i].start;
 			dev->av_table.num_ddr_avs = profile[i].num;
 			break;
 		case MTHCA_RES_UARC:
 			dev->uar_table.uarc_size = request->uarc_size;
 			dev->uar_table.uarc_base = profile[i].start;
 			init_hca->uarc_base   	 = profile[i].start;
 			init_hca->log_uarc_sz 	 = ffs(request->uarc_size) - 13;
 			init_hca->log_uar_sz  	 = ffs(request->num_uar) - 1;
 			break;
 		default:
 			break;
 		}
 	}
 	/*
 	 * PDs don't take any HCA memory, but we assign them as part
 	 * of the HCA profile anyway.
 	 */
 	dev->limits.num_pds = MTHCA_NUM_PDS;
 	if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT &&
 	    init_hca->log_mpt_sz > 23) {
 		mthca_warn(dev, "MPT table too large (requested size 2^%d >= 2^24)\n",
 			   init_hca->log_mpt_sz);
 		mthca_warn(dev, "Disabling memory key throughput optimization.\n");
 		dev->mthca_flags &= ~MTHCA_FLAG_SINAI_OPT;
 	}
 	/*
 	 * For Tavor, FMRs use ioremapped PCI memory. For 32 bit
 	 * systems it may use too much vmalloc space to map all MTT
 	 * memory, so we reserve some MTTs for FMR access, taking them
 	 * out of the MR pool. They don't use additional memory, but
 	 * we assign them as part of the HCA profile anyway.
 	 */
 	if (mthca_is_memfree(dev) || BITS_PER_LONG == 64)
 		dev->limits.fmr_reserved_mtts = 0;
 	else
 		dev->limits.fmr_reserved_mtts = request->fmr_reserved_mtts;
 	kfree(profile);
 	return total_size;
 }
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_profile.h
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_profile.h
@ -1,59 +0,0 @@
 /*
 * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #ifndef MTHCA_PROFILE_H
 #define MTHCA_PROFILE_H
 #include "mthca_dev.h"
 #include "mthca_cmd.h"
 struct mthca_profile {
 	int num_qp;
 	int rdb_per_qp;
 	int num_srq;
 	int num_cq;
 	int num_mcg;
 	int num_mpt;
 	int num_mtt;
 	int num_udav;
 	int num_uar;
 	int uarc_size;
 	int fmr_reserved_mtts;
 };
 s64 mthca_make_profile(struct mthca_dev *mdev,
 		       struct mthca_profile *request,
 		       struct mthca_dev_lim *dev_lim,
 		       struct mthca_init_hca_param *init_hca);
 #endif /* MTHCA_PROFILE_H */
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.h
@ -1,343 +0,0 @@
 /*
 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
 * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #ifndef MTHCA_PROVIDER_H
 #define MTHCA_PROVIDER_H
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_pack.h>
 #define MTHCA_MPT_FLAG_ATOMIC        (1 << 14)
 #define MTHCA_MPT_FLAG_REMOTE_WRITE  (1 << 13)
 #define MTHCA_MPT_FLAG_REMOTE_READ   (1 << 12)
 #define MTHCA_MPT_FLAG_LOCAL_WRITE   (1 << 11)
 #define MTHCA_MPT_FLAG_LOCAL_READ    (1 << 10)
 struct mthca_buf_list {
 	void *buf;
 	DECLARE_PCI_UNMAP_ADDR(mapping)
 };
 union mthca_buf {
 	struct mthca_buf_list direct;
 	struct mthca_buf_list *page_list;
 };
 struct mthca_uar {
 	unsigned long pfn;
 	int           index;
 };
 struct mthca_user_db_table;
 struct mthca_ucontext {
 	struct ib_ucontext          ibucontext;
 	struct mthca_uar            uar;
 	struct mthca_user_db_table *db_tab;
 	int			    reg_mr_warned;
 };
 struct mthca_mtt;
 struct mthca_mr {
 	struct ib_mr      ibmr;
 	struct ib_umem   *umem;
 	struct mthca_mtt *mtt;
 };
 struct mthca_fmr {
 	struct ib_fmr      ibmr;
 	struct ib_fmr_attr attr;
 	struct mthca_mtt  *mtt;
 	int                maps;
 	union {
 		struct {
 			struct mthca_mpt_entry __iomem *mpt;
 			u64 __iomem *mtts;
 		} tavor;
 		struct {
 			struct mthca_mpt_entry *mpt;
 			__be64 *mtts;
 			dma_addr_t dma_handle;
 		} arbel;
 	} mem;
 };
 struct mthca_pd {
 	struct ib_pd    ibpd;
 	u32             pd_num;
 	atomic_t        sqp_count;
 	struct mthca_mr ntmr;
 	int             privileged;
 };
 struct mthca_eq {
 	struct mthca_dev      *dev;
 	int                    eqn;
 	u32                    eqn_mask;
 	u32                    cons_index;
 	u16                    msi_x_vector;
 	u16                    msi_x_entry;
 	int                    have_irq;
 	int                    nent;
 	struct mthca_buf_list *page_list;
 	struct mthca_mr        mr;
 };
 struct mthca_av;
 enum mthca_ah_type {
 	MTHCA_AH_ON_HCA,
 	MTHCA_AH_PCI_POOL,
 	MTHCA_AH_KMALLOC
 };
 struct mthca_ah {
 	struct ib_ah       ibah;
 	enum mthca_ah_type type;
 	u32                key;
 	struct mthca_av   *av;
 	dma_addr_t         avdma;
 };
 /*
 * Quick description of our CQ/QP locking scheme:
 *
 * We have one global lock that protects dev->cq/qp_table.  Each
 * struct mthca_cq/qp also has its own lock.  An individual qp lock
 * may be taken inside of an individual cq lock.  Both cqs attached to
 * a qp may be locked, with the cq with the lower cqn locked first.
 * No other nesting should be done.
 *
 * Each struct mthca_cq/qp also has an ref count, protected by the
 * corresponding table lock.  The pointer from the cq/qp_table to the
 * struct counts as one reference.  This reference also is good for
 * access through the consumer API, so modifying the CQ/QP etc doesn't
 * need to take another reference.  Access to a QP because of a
 * completion being polled does not need a reference either.
 *
 * Finally, each struct mthca_cq/qp has a wait_queue_head_t for the
 * destroy function to sleep on.
 *
 * This means that access from the consumer API requires nothing but
 * taking the struct's lock.
 *
 * Access because of a completion event should go as follows:
 * - lock cq/qp_table and look up struct
 * - increment ref count in struct
 * - drop cq/qp_table lock
 * - lock struct, do your thing, and unlock struct
 * - decrement ref count; if zero, wake up waiters
 *
 * To destroy a CQ/QP, we can do the following:
 * - lock cq/qp_table
 * - remove pointer and decrement ref count
 * - unlock cq/qp_table lock
 * - wait_event until ref count is zero
 *
 * It is the consumer's responsibilty to make sure that no QP
 * operations (WQE posting or state modification) are pending when a
 * QP is destroyed.  Also, the consumer must make sure that calls to
 * qp_modify are serialized.  Similarly, the consumer is responsible
 * for ensuring that no CQ resize operations are pending when a CQ
 * is destroyed.
 *
 * Possible optimizations (wait for profile data to see if/where we
 * have locks bouncing between CPUs):
 * - split cq/qp table lock into n separate (cache-aligned) locks,
 *   indexed (say) by the page in the table
 * - split QP struct lock into three (one for common info, one for the
 *   send queue and one for the receive queue)
 */
 struct mthca_cq_buf {
 	union mthca_buf		queue;
 	struct mthca_mr		mr;
 	int			is_direct;
 };
 struct mthca_cq_resize {
 	struct mthca_cq_buf	buf;
 	int			cqe;
 	enum {
 		CQ_RESIZE_ALLOC,
 		CQ_RESIZE_READY,
 		CQ_RESIZE_SWAPPED
 	}			state;
 };
 struct mthca_cq {
 	struct ib_cq		ibcq;
 	spinlock_t		lock;
 	int			refcount;
 	int			cqn;
 	u32			cons_index;
 	struct mthca_cq_buf	buf;
 	struct mthca_cq_resize *resize_buf;
 	int			is_kernel;
 	/* Next fields are Arbel only */
 	int			set_ci_db_index;
 	__be32		       *set_ci_db;
 	int			arm_db_index;
 	__be32		       *arm_db;
 	int			arm_sn;
 	wait_queue_head_t	wait;
 	struct mutex		mutex;
 };
 struct mthca_srq {
 	struct ib_srq		ibsrq;
 	spinlock_t		lock;
 	int			refcount;
 	int			srqn;
 	int			max;
 	int			max_gs;
 	int			wqe_shift;
 	int			first_free;
 	int			last_free;
 	u16			counter;  /* Arbel only */
 	int			db_index; /* Arbel only */
 	__be32		       *db;       /* Arbel only */
 	void		       *last;
 	int			is_direct;
 	u64		       *wrid;
 	union mthca_buf		queue;
 	struct mthca_mr		mr;
 	wait_queue_head_t	wait;
 	struct mutex		mutex;
 };
 struct mthca_wq {
 	spinlock_t lock;
 	int        max;
 	unsigned   next_ind;
 	unsigned   last_comp;
 	unsigned   head;
 	unsigned   tail;
 	void      *last;
 	int        max_gs;
 	int        wqe_shift;
 	int        db_index;	/* Arbel only */
 	__be32    *db;
 };
 struct mthca_qp {
 	struct ib_qp           ibqp;
 	int                    refcount;
 	u32                    qpn;
 	int                    is_direct;
 	u8                     port; /* for SQP and memfree use only */
 	u8                     alt_port; /* for memfree use only */
 	u8                     transport;
 	u8                     state;
 	u8                     atomic_rd_en;
 	u8                     resp_depth;
 	struct mthca_mr        mr;
 	struct mthca_wq        rq;
 	struct mthca_wq        sq;
 	enum ib_sig_type       sq_policy;
 	int                    send_wqe_offset;
 	int                    max_inline_data;
 	u64                   *wrid;
 	union mthca_buf	       queue;
 	wait_queue_head_t      wait;
 	struct mutex	       mutex;
 };
 struct mthca_sqp {
 	struct mthca_qp qp;
 	int             pkey_index;
 	u32             qkey;
 	u32             send_psn;
 	struct ib_ud_header ud_header;
 	int             header_buf_size;
 	void           *header_buf;
 	dma_addr_t      header_dma;
 };
 static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
 {
 	return container_of(ibucontext, struct mthca_ucontext, ibucontext);
 }
 static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr)
 {
 	return container_of(ibmr, struct mthca_fmr, ibmr);
 }
 static inline struct mthca_mr *to_mmr(struct ib_mr *ibmr)
 {
 	return container_of(ibmr, struct mthca_mr, ibmr);
 }
 static inline struct mthca_pd *to_mpd(struct ib_pd *ibpd)
 {
 	return container_of(ibpd, struct mthca_pd, ibpd);
 }
 static inline struct mthca_ah *to_mah(struct ib_ah *ibah)
 {
 	return container_of(ibah, struct mthca_ah, ibah);
 }
 static inline struct mthca_cq *to_mcq(struct ib_cq *ibcq)
 {
 	return container_of(ibcq, struct mthca_cq, ibcq);
 }
 static inline struct mthca_srq *to_msrq(struct ib_srq *ibsrq)
 {
 	return container_of(ibsrq, struct mthca_srq, ibsrq);
 }
 static inline struct mthca_qp *to_mqp(struct ib_qp *ibqp)
 {
 	return container_of(ibqp, struct mthca_qp, ibqp);
 }
 static inline struct mthca_sqp *to_msqp(struct mthca_qp *qp)
 {
 	return container_of(qp, struct mthca_sqp, qp);
 }
 #endif /* MTHCA_PROVIDER_H */
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_qp.c
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_reset.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_reset.c
@ -1,303 +0,0 @@
 /*
 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #include <linux/errno.h>
 #include <linux/pci.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include "mthca_dev.h"
 #include "mthca_cmd.h"
 int mthca_reset(struct mthca_dev *mdev)
 {
 	int i;
 	int err = 0;
 	u32 *hca_header    = NULL;
 #ifdef __linux__
 	u32 *bridge_header = NULL;
 #endif
 	struct pci_dev *bridge = NULL;
 #ifdef __linux__
 	int bridge_pcix_cap = 0;
 #endif
 	int hca_pcie_cap = 0;
 	int hca_pcix_cap = 0;
 	u16 devctl;
 	u16 linkctl;
 #define MTHCA_RESET_OFFSET 0xf0010
 #define MTHCA_RESET_VALUE  swab32(1)
 	/*
 	 * Reset the chip.  This is somewhat ugly because we have to
 	 * save off the PCI header before reset and then restore it
 	 * after the chip reboots.  We skip config space offsets 22
 	 * and 23 since those have a special meaning.
 	 *
 	 * To make matters worse, for Tavor (PCI-X HCA) we have to
 	 * find the associated bridge device and save off its PCI
 	 * header as well.
 	 */
 	if (!(mdev->mthca_flags & MTHCA_FLAG_PCIE)) {
 		/* Look for the bridge -- its device ID will be 2 more
 		   than HCA's device ID. */
 #ifdef __linux__
 		while ((bridge = pci_get_device(mdev->pdev->vendor,
 						mdev->pdev->device + 2,
 						bridge)) != NULL) {
 			if (bridge->hdr_type    == PCI_HEADER_TYPE_BRIDGE &&
 			    bridge->subordinate == mdev->pdev->bus) {
 				mthca_dbg(mdev, "Found bridge: %s\n",
 					  pci_name(bridge));
 				break;
 			}
 		}
 		if (!bridge) {
 			/*
 			 * Didn't find a bridge for a Tavor device --
 			 * assume we're in no-bridge mode and hope for
 			 * the best.
 			 */
 			mthca_warn(mdev, "No bridge found for %s\n",
 				  pci_name(mdev->pdev));
 		}
 #else
 		mthca_warn(mdev, "Reset on PCI-X is not supported.\n");
 		goto out;
 #endif
 	}
 	/* For Arbel do we need to save off the full 4K PCI Express header?? */
 	hca_header = kmalloc(256, GFP_KERNEL);
 	if (!hca_header) {
 		err = -ENOMEM;
 		mthca_err(mdev, "Couldn't allocate memory to save HCA "
 			  "PCI header, aborting.\n");
 		goto out;
 	}
 	for (i = 0; i < 64; ++i) {
 		if (i == 22 || i == 23)
 			continue;
 		if (pci_read_config_dword(mdev->pdev, i * 4, hca_header + i)) {
 			err = -ENODEV;
 			mthca_err(mdev, "Couldn't save HCA "
 				  "PCI header, aborting.\n");
 			goto out;
 		}
 	}
 	hca_pcix_cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX);
 	hca_pcie_cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_EXP);
 #ifdef __linux__
 	if (bridge) {
 		bridge_header = kmalloc(256, GFP_KERNEL);
 		if (!bridge_header) {
 			err = -ENOMEM;
 			mthca_err(mdev, "Couldn't allocate memory to save HCA "
 				  "bridge PCI header, aborting.\n");
 			goto out;
 		}
 		for (i = 0; i < 64; ++i) {
 			if (i == 22 || i == 23)
 				continue;
 			if (pci_read_config_dword(bridge, i * 4, bridge_header + i)) {
 				err = -ENODEV;
 				mthca_err(mdev, "Couldn't save HCA bridge "
 					  "PCI header, aborting.\n");
 				goto out;
 			}
 		}
 		bridge_pcix_cap = pci_find_capability(bridge, PCI_CAP_ID_PCIX);
 		if (!bridge_pcix_cap) {
 				err = -ENODEV;
 				mthca_err(mdev, "Couldn't locate HCA bridge "
 					  "PCI-X capability, aborting.\n");
 				goto out;
 		}
 	}
 #endif
 	/* actually hit reset */
 	{
 		void __iomem *reset = ioremap(pci_resource_start(mdev->pdev, 0) +
 					      MTHCA_RESET_OFFSET, 4);
 		if (!reset) {
 			err = -ENOMEM;
 			mthca_err(mdev, "Couldn't map HCA reset register, "
 				  "aborting.\n");
 			goto out;
 		}
 		writel(MTHCA_RESET_VALUE, reset);
 		iounmap(reset);
 	}
 	/* Docs say to wait one second before accessing device */
 	msleep(1000);
 	/* Now wait for PCI device to start responding again */
 	{
 		u32 v;
 		int c = 0;
 		for (c = 0; c < 100; ++c) {
 			if (pci_read_config_dword(bridge ? bridge : mdev->pdev, 0, &v)) {
 				err = -ENODEV;
 				mthca_err(mdev, "Couldn't access HCA after reset, "
 					  "aborting.\n");
 				goto out;
 			}
 			if (v != 0xffffffff)
 				goto good;
 			msleep(100);
 		}
 		err = -ENODEV;
 		mthca_err(mdev, "PCI device did not come back after reset, "
 			  "aborting.\n");
 		goto out;
 	}
 good:
 #ifdef __linux__
 	/* Now restore the PCI headers */
 	if (bridge) {
 		if (pci_write_config_dword(bridge, bridge_pcix_cap + 0x8,
 				 bridge_header[(bridge_pcix_cap + 0x8) / 4])) {
 			err = -ENODEV;
 			mthca_err(mdev, "Couldn't restore HCA bridge Upstream "
 				  "split transaction control, aborting.\n");
 			goto out;
 		}
 		if (pci_write_config_dword(bridge, bridge_pcix_cap + 0xc,
 				 bridge_header[(bridge_pcix_cap + 0xc) / 4])) {
 			err = -ENODEV;
 			mthca_err(mdev, "Couldn't restore HCA bridge Downstream "
 				  "split transaction control, aborting.\n");
 			goto out;
 		}
 		/*
 		 * Bridge control register is at 0x3e, so we'll
 		 * naturally restore it last in this loop.
 		 */
 		for (i = 0; i < 16; ++i) {
 			if (i * 4 == PCI_COMMAND)
 				continue;
 			if (pci_write_config_dword(bridge, i * 4, bridge_header[i])) {
 				err = -ENODEV;
 				mthca_err(mdev, "Couldn't restore HCA bridge reg %x, "
 					  "aborting.\n", i);
 				goto out;
 			}
 		}
 		if (pci_write_config_dword(bridge, PCI_COMMAND,
 					   bridge_header[PCI_COMMAND / 4])) {
 			err = -ENODEV;
 			mthca_err(mdev, "Couldn't restore HCA bridge COMMAND, "
 				  "aborting.\n");
 			goto out;
 		}
 	}
 #endif
 	if (hca_pcix_cap) {
 		if (pci_write_config_dword(mdev->pdev, hca_pcix_cap,
 				 hca_header[hca_pcix_cap / 4])) {
 			err = -ENODEV;
 			mthca_err(mdev, "Couldn't restore HCA PCI-X "
 				  "command register, aborting.\n");
 			goto out;
 		}
 	}
 	if (hca_pcie_cap) {
 		devctl = hca_header[(hca_pcie_cap + PCI_EXP_DEVCTL) / 4];
 		if (pci_write_config_word(mdev->pdev, hca_pcie_cap + PCI_EXP_DEVCTL,
 					   devctl)) {
 			err = -ENODEV;
 			mthca_err(mdev, "Couldn't restore HCA PCI Express "
 				  "Device Control register, aborting.\n");
 			goto out;
 		}
 		linkctl = hca_header[(hca_pcie_cap + PCI_EXP_LNKCTL) / 4];
 		if (pci_write_config_word(mdev->pdev, hca_pcie_cap + PCI_EXP_LNKCTL,
 					   linkctl)) {
 			err = -ENODEV;
 			mthca_err(mdev, "Couldn't restore HCA PCI Express "
 				  "Link control register, aborting.\n");
 			goto out;
 		}
 	}
 	for (i = 0; i < 16; ++i) {
 		if (i * 4 == PCI_COMMAND)
 			continue;
 		if (pci_write_config_dword(mdev->pdev, i * 4, hca_header[i])) {
 			err = -ENODEV;
 			mthca_err(mdev, "Couldn't restore HCA reg %x, "
 				  "aborting.\n", i);
 			goto out;
 		}
 	}
 	if (pci_write_config_dword(mdev->pdev, PCI_COMMAND,
 				   hca_header[PCI_COMMAND / 4])) {
 		err = -ENODEV;
 		mthca_err(mdev, "Couldn't restore HCA COMMAND, "
 			  "aborting.\n");
 		goto out;
 	}
 out:
 #ifdef __linux__
 	if (bridge)
 		pci_dev_put(bridge);
 	kfree(bridge_header);
 #endif
 	kfree(hca_header);
 	return err;
 }
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_srq.c
@ -1,715 +0,0 @@
 /*
 * Copyright (c) 2005 Cisco Systems. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/sched.h>
 #include <asm/io.h>
 #include "mthca_dev.h"
 #include "mthca_cmd.h"
 #include "mthca_memfree.h"
 #include "mthca_wqe.h"
 enum {
 	MTHCA_MAX_DIRECT_SRQ_SIZE = 4 * PAGE_SIZE
 };
 struct mthca_tavor_srq_context {
 	__be64 wqe_base_ds;	/* low 6 bits is descriptor size */
 	__be32 state_pd;
 	__be32 lkey;
 	__be32 uar;
 	__be16 limit_watermark;
 	__be16 wqe_cnt;
 	u32    reserved[2];
 };
 struct mthca_arbel_srq_context {
 	__be32 state_logsize_srqn;
 	__be32 lkey;
 	__be32 db_index;
 	__be32 logstride_usrpage;
 	__be64 wqe_base;
 	__be32 eq_pd;
 	__be16 limit_watermark;
 	__be16 wqe_cnt;
 	u16    reserved1;
 	__be16 wqe_counter;
 	u32    reserved2[3];
 };
 static void *get_wqe(struct mthca_srq *srq, int n)
 {
 	if (srq->is_direct)
 		return srq->queue.direct.buf + (n << srq->wqe_shift);
 	else
 		return srq->queue.page_list[(n << srq->wqe_shift) >> PAGE_SHIFT].buf +
 			((n << srq->wqe_shift) & (PAGE_SIZE - 1));
 }
 /*
 * Return a pointer to the location within a WQE that we're using as a
 * link when the WQE is in the free list.  We use the imm field
 * because in the Tavor case, posting a WQE may overwrite the next
 * segment of the previous WQE, but a receive WQE will never touch the
 * imm field.  This avoids corrupting our free list if the previous
 * WQE has already completed and been put on the free list when we
 * post the next WQE.
 */
 static inline int *wqe_to_link(void *wqe)
 {
 	return (int *) (wqe + offsetof(struct mthca_next_seg, imm));
 }
 static void mthca_tavor_init_srq_context(struct mthca_dev *dev,
 					 struct mthca_pd *pd,
 					 struct mthca_srq *srq,
 					 struct mthca_tavor_srq_context *context)
 {
 	memset(context, 0, sizeof *context);
 	context->wqe_base_ds = cpu_to_be64(1 << (srq->wqe_shift - 4));
 	context->state_pd    = cpu_to_be32(pd->pd_num);
 	context->lkey        = cpu_to_be32(srq->mr.ibmr.lkey);
 	if (pd->ibpd.uobject)
 		context->uar =
 			cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index);
 	else
 		context->uar = cpu_to_be32(dev->driver_uar.index);
 }
 static void mthca_arbel_init_srq_context(struct mthca_dev *dev,
 					 struct mthca_pd *pd,
 					 struct mthca_srq *srq,
 					 struct mthca_arbel_srq_context *context)
 {
 	int logsize, max;
 	memset(context, 0, sizeof *context);
 	/*
 	 * Put max in a temporary variable to work around gcc bug
 	 * triggered by ilog2() on sparc64.
 	 */
 	max = srq->max;
 	logsize = ilog2(max);
 	context->state_logsize_srqn = cpu_to_be32(logsize << 24 | srq->srqn);
 	context->lkey = cpu_to_be32(srq->mr.ibmr.lkey);
 	context->db_index = cpu_to_be32(srq->db_index);
 	context->logstride_usrpage = cpu_to_be32((srq->wqe_shift - 4) << 29);
 	if (pd->ibpd.uobject)
 		context->logstride_usrpage |=
 			cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index);
 	else
 		context->logstride_usrpage |= cpu_to_be32(dev->driver_uar.index);
 	context->eq_pd = cpu_to_be32(MTHCA_EQ_ASYNC << 24 | pd->pd_num);
 }
 static void mthca_free_srq_buf(struct mthca_dev *dev, struct mthca_srq *srq)
 {
 	mthca_buf_free(dev, srq->max << srq->wqe_shift, &srq->queue,
 		       srq->is_direct, &srq->mr);
 	kfree(srq->wrid);
 }
 static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd,
 			       struct mthca_srq *srq)
 {
 	struct mthca_data_seg *scatter;
 	void *wqe;
 	int err;
 	int i;
 	if (pd->ibpd.uobject)
 		return 0;
 	srq->wrid = kmalloc(srq->max * sizeof (u64), GFP_KERNEL);
 	if (!srq->wrid)
 		return -ENOMEM;
 	err = mthca_buf_alloc(dev, srq->max << srq->wqe_shift,
 			      MTHCA_MAX_DIRECT_SRQ_SIZE,
 			      &srq->queue, &srq->is_direct, pd, 1, &srq->mr);
 	if (err) {
 		kfree(srq->wrid);
 		return err;
 	}
 	/*
 	 * Now initialize the SRQ buffer so that all of the WQEs are
 	 * linked into the list of free WQEs.  In addition, set the
 	 * scatter list L_Keys to the sentry value of 0x100.
 	 */
 	for (i = 0; i < srq->max; ++i) {
 		struct mthca_next_seg *next;
 		next = wqe = get_wqe(srq, i);
 		if (i < srq->max - 1) {
 			*wqe_to_link(wqe) = i + 1;
 			next->nda_op = htonl(((i + 1) << srq->wqe_shift) | 1);
 		} else {
 			*wqe_to_link(wqe) = -1;
 			next->nda_op = 0;
 		}
 		for (scatter = wqe + sizeof (struct mthca_next_seg);
 		     (void *) scatter < wqe + (1 << srq->wqe_shift);
 		     ++scatter)
 			scatter->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
 	}
 	srq->last = get_wqe(srq, srq->max - 1);
 	return 0;
 }
 int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
 		    struct ib_srq_attr *attr, struct mthca_srq *srq)
 {
 	struct mthca_mailbox *mailbox;
 	u8 status;
 	int ds;
 	int err;
 	/* Sanity check SRQ size before proceeding */
 	if (attr->max_wr  > dev->limits.max_srq_wqes ||
 	    attr->max_sge > dev->limits.max_srq_sge)
 		return -EINVAL;
 	srq->max      = attr->max_wr;
 	srq->max_gs   = attr->max_sge;
 	srq->counter  = 0;
 	if (mthca_is_memfree(dev))
 		srq->max = roundup_pow_of_two(srq->max + 1);
 	else
 		srq->max = srq->max + 1;
 	ds = max(64UL,
 		 roundup_pow_of_two(sizeof (struct mthca_next_seg) +
 				    srq->max_gs * sizeof (struct mthca_data_seg)));
 	if (!mthca_is_memfree(dev) && (ds > dev->limits.max_desc_sz))
 		return -EINVAL;
 	srq->wqe_shift = ilog2(ds);
 	srq->srqn = mthca_alloc(&dev->srq_table.alloc);
 	if (srq->srqn == -1)
 		return -ENOMEM;
 	if (mthca_is_memfree(dev)) {
 		err = mthca_table_get(dev, dev->srq_table.table, srq->srqn);
 		if (err)
 			goto err_out;
 		if (!pd->ibpd.uobject) {
 			srq->db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SRQ,
 						       srq->srqn, &srq->db);
 			if (srq->db_index < 0) {
 				err = -ENOMEM;
 				goto err_out_icm;
 			}
 		}
 	}
 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 	if (IS_ERR(mailbox)) {
 		err = PTR_ERR(mailbox);
 		goto err_out_db;
 	}
 	err = mthca_alloc_srq_buf(dev, pd, srq);
 	if (err)
 		goto err_out_mailbox;
 	spin_lock_init(&srq->lock);
 	srq->refcount = 1;
 	init_waitqueue_head(&srq->wait);
 	mutex_init(&srq->mutex);
 	if (mthca_is_memfree(dev))
 		mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf);
 	else
 		mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf);
 	err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn, &status);
 	if (err) {
 		mthca_warn(dev, "SW2HW_SRQ failed (%d)\n", err);
 		goto err_out_free_buf;
 	}
 	if (status) {
 		mthca_warn(dev, "SW2HW_SRQ returned status 0x%02x\n",
 			   status);
 		err = -EINVAL;
 		goto err_out_free_buf;
 	}
 	spin_lock_irq(&dev->srq_table.lock);
 	if (mthca_array_set(&dev->srq_table.srq,
 			    srq->srqn & (dev->limits.num_srqs - 1),
 			    srq)) {
 		spin_unlock_irq(&dev->srq_table.lock);
 		goto err_out_free_srq;
 	}
 	spin_unlock_irq(&dev->srq_table.lock);
 	mthca_free_mailbox(dev, mailbox);
 	srq->first_free = 0;
 	srq->last_free  = srq->max - 1;
 	attr->max_wr    = srq->max - 1;
 	attr->max_sge   = srq->max_gs;
 	return 0;
 err_out_free_srq:
 	err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn, &status);
 	if (err)
 		mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err);
 	else if (status)
 		mthca_warn(dev, "HW2SW_SRQ returned status 0x%02x\n", status);
 err_out_free_buf:
 	if (!pd->ibpd.uobject)
 		mthca_free_srq_buf(dev, srq);
 err_out_mailbox:
 	mthca_free_mailbox(dev, mailbox);
 err_out_db:
 	if (!pd->ibpd.uobject && mthca_is_memfree(dev))
 		mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index);
 err_out_icm:
 	mthca_table_put(dev, dev->srq_table.table, srq->srqn);
 err_out:
 	mthca_free(&dev->srq_table.alloc, srq->srqn);
 	return err;
 }
 static inline int get_srq_refcount(struct mthca_dev *dev, struct mthca_srq *srq)
 {
 	int c;
 	spin_lock_irq(&dev->srq_table.lock);
 	c = srq->refcount;
 	spin_unlock_irq(&dev->srq_table.lock);
 	return c;
 }
 void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq)
 {
 	struct mthca_mailbox *mailbox;
 	int err;
 	u8 status;
 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 	if (IS_ERR(mailbox)) {
 		mthca_warn(dev, "No memory for mailbox to free SRQ.\n");
 		return;
 	}
 	err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn, &status);
 	if (err)
 		mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err);
 	else if (status)
 		mthca_warn(dev, "HW2SW_SRQ returned status 0x%02x\n", status);
 	spin_lock_irq(&dev->srq_table.lock);
 	mthca_array_clear(&dev->srq_table.srq,
 			  srq->srqn & (dev->limits.num_srqs - 1));
 	--srq->refcount;
 	spin_unlock_irq(&dev->srq_table.lock);
 	wait_event(srq->wait, !get_srq_refcount(dev, srq));
 	if (!srq->ibsrq.uobject) {
 		mthca_free_srq_buf(dev, srq);
 		if (mthca_is_memfree(dev))
 			mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index);
 	}
 	mthca_table_put(dev, dev->srq_table.table, srq->srqn);
 	mthca_free(&dev->srq_table.alloc, srq->srqn);
 	mthca_free_mailbox(dev, mailbox);
 }
 int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 		     enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
 {
 	struct mthca_dev *dev = to_mdev(ibsrq->device);
 	struct mthca_srq *srq = to_msrq(ibsrq);
 	int ret;
 	u8 status;
 	/* We don't support resizing SRQs (yet?) */
 	if (attr_mask & IB_SRQ_MAX_WR)
 		return -EINVAL;
 	if (attr_mask & IB_SRQ_LIMIT) {
 		u32 max_wr = mthca_is_memfree(dev) ? srq->max - 1 : srq->max;
 		if (attr->srq_limit > max_wr)
 			return -EINVAL;
 		mutex_lock(&srq->mutex);
 		ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit, &status);
 		mutex_unlock(&srq->mutex);
 		if (ret)
 			return ret;
 		if (status)
 			return -EINVAL;
 	}
 	return 0;
 }
 int mthca_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
 {
 	struct mthca_dev *dev = to_mdev(ibsrq->device);
 	struct mthca_srq *srq = to_msrq(ibsrq);
 	struct mthca_mailbox *mailbox;
 	struct mthca_arbel_srq_context *arbel_ctx;
 	struct mthca_tavor_srq_context *tavor_ctx;
 	u8 status;
 	int err;
 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 	if (IS_ERR(mailbox))
 		return PTR_ERR(mailbox);
 	err = mthca_QUERY_SRQ(dev, srq->srqn, mailbox, &status);
 	if (err)
 		goto out;
 	if (mthca_is_memfree(dev)) {
 		arbel_ctx = mailbox->buf;
 		srq_attr->srq_limit = be16_to_cpu(arbel_ctx->limit_watermark);
 	} else {
 		tavor_ctx = mailbox->buf;
 		srq_attr->srq_limit = be16_to_cpu(tavor_ctx->limit_watermark);
 	}
 	srq_attr->max_wr  = srq->max - 1;
 	srq_attr->max_sge = srq->max_gs;
 out:
 	mthca_free_mailbox(dev, mailbox);
 	return err;
 }
 void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
 		     enum ib_event_type event_type)
 {
 	struct mthca_srq *srq;
 	struct ib_event event;
 	spin_lock(&dev->srq_table.lock);
 	srq = mthca_array_get(&dev->srq_table.srq, srqn & (dev->limits.num_srqs - 1));
 	if (srq)
 		++srq->refcount;
 	spin_unlock(&dev->srq_table.lock);
 	if (!srq) {
 		mthca_warn(dev, "Async event for bogus SRQ %08x\n", srqn);
 		return;
 	}
 	if (!srq->ibsrq.event_handler)
 		goto out;
 	event.device      = &dev->ib_dev;
 	event.event       = event_type;
 	event.element.srq = &srq->ibsrq;
 	srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context);
 out:
 	spin_lock(&dev->srq_table.lock);
 	if (!--srq->refcount)
 		wake_up(&srq->wait);
 	spin_unlock(&dev->srq_table.lock);
 }
 /*
 * This function must be called with IRQs disabled.
 */
 void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr)
 {
 	int ind;
 	struct mthca_next_seg *last_free;
 	ind = wqe_addr >> srq->wqe_shift;
 	spin_lock(&srq->lock);
 	last_free = get_wqe(srq, srq->last_free);
 	*wqe_to_link(last_free) = ind;
 	last_free->nda_op = htonl((ind << srq->wqe_shift) | 1);
 	*wqe_to_link(get_wqe(srq, ind)) = -1;
 	srq->last_free = ind;
 	spin_unlock(&srq->lock);
 }
 int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
 			      struct ib_recv_wr **bad_wr)
 {
 	struct mthca_dev *dev = to_mdev(ibsrq->device);
 	struct mthca_srq *srq = to_msrq(ibsrq);
 	unsigned long flags;
 	int err = 0;
 	int first_ind;
 	int ind;
 	int next_ind;
 	int nreq;
 	int i;
 	void *wqe;
 	void *prev_wqe;
 	spin_lock_irqsave(&srq->lock, flags);
 	first_ind = srq->first_free;
 	for (nreq = 0; wr; wr = wr->next) {
 		ind       = srq->first_free;
 		wqe       = get_wqe(srq, ind);
 		next_ind  = *wqe_to_link(wqe);
 		if (unlikely(next_ind < 0)) {
 			mthca_err(dev, "SRQ %06x full\n", srq->srqn);
 			err = -ENOMEM;
 			*bad_wr = wr;
 			break;
 		}
 		prev_wqe  = srq->last;
 		srq->last = wqe;
 		((struct mthca_next_seg *) wqe)->ee_nds = 0;
 		/* flags field will always remain 0 */
 		wqe += sizeof (struct mthca_next_seg);
 		if (unlikely(wr->num_sge > srq->max_gs)) {
 			err = -EINVAL;
 			*bad_wr = wr;
 			srq->last = prev_wqe;
 			break;
 		}
 		for (i = 0; i < wr->num_sge; ++i) {
 			mthca_set_data_seg(wqe, wr->sg_list + i);
 			wqe += sizeof (struct mthca_data_seg);
 		}
 		if (i < srq->max_gs)
 			mthca_set_data_seg_inval(wqe);
 		((struct mthca_next_seg *) prev_wqe)->ee_nds =
 			cpu_to_be32(MTHCA_NEXT_DBD);
 		srq->wrid[ind]  = wr->wr_id;
 		srq->first_free = next_ind;
 		++nreq;
 		if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
 			nreq = 0;
 			/*
 			 * Make sure that descriptors are written
 			 * before doorbell is rung.
 			 */
 			wmb();
 			mthca_write64(first_ind << srq->wqe_shift, srq->srqn << 8,
 				      dev->kar + MTHCA_RECEIVE_DOORBELL,
 				      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
 			first_ind = srq->first_free;
 		}
 	}
 	if (likely(nreq)) {
 		/*
 		 * Make sure that descriptors are written before
 		 * doorbell is rung.
 		 */
 		wmb();
 		mthca_write64(first_ind << srq->wqe_shift, (srq->srqn << 8) | nreq,
 			      dev->kar + MTHCA_RECEIVE_DOORBELL,
 			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
 	}
 	/*
 	 * Make sure doorbells don't leak out of SRQ spinlock and
 	 * reach the HCA out of order:
 	 */
 	mmiowb();
 	spin_unlock_irqrestore(&srq->lock, flags);
 	return err;
 }
 int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
 			      struct ib_recv_wr **bad_wr)
 {
 	struct mthca_dev *dev = to_mdev(ibsrq->device);
 	struct mthca_srq *srq = to_msrq(ibsrq);
 	unsigned long flags;
 	int err = 0;
 	int ind;
 	int next_ind;
 	int nreq;
 	int i;
 	void *wqe;
 	spin_lock_irqsave(&srq->lock, flags);
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
 		ind       = srq->first_free;
 		wqe       = get_wqe(srq, ind);
 		next_ind  = *wqe_to_link(wqe);
 		if (unlikely(next_ind < 0)) {
 			mthca_err(dev, "SRQ %06x full\n", srq->srqn);
 			err = -ENOMEM;
 			*bad_wr = wr;
 			break;
 		}
 		((struct mthca_next_seg *) wqe)->ee_nds = 0;
 		/* flags field will always remain 0 */
 		wqe += sizeof (struct mthca_next_seg);
 		if (unlikely(wr->num_sge > srq->max_gs)) {
 			err = -EINVAL;
 			*bad_wr = wr;
 			break;
 		}
 		for (i = 0; i < wr->num_sge; ++i) {
 			mthca_set_data_seg(wqe, wr->sg_list + i);
 			wqe += sizeof (struct mthca_data_seg);
 		}
 		if (i < srq->max_gs)
 			mthca_set_data_seg_inval(wqe);
 		srq->wrid[ind]  = wr->wr_id;
 		srq->first_free = next_ind;
 	}
 	if (likely(nreq)) {
 		srq->counter += nreq;
 		/*
 		 * Make sure that descriptors are written before
 		 * we write doorbell record.
 		 */
 		wmb();
 		*srq->db = cpu_to_be32(srq->counter);
 	}
 	spin_unlock_irqrestore(&srq->lock, flags);
 	return err;
 }
 int mthca_max_srq_sge(struct mthca_dev *dev)
 {
 	if (mthca_is_memfree(dev))
 		return dev->limits.max_sg;
 	/*
 	 * SRQ allocations are based on powers of 2 for Tavor,
 	 * (although they only need to be multiples of 16 bytes).
 	 *
 	 * Therefore, we need to base the max number of sg entries on
 	 * the largest power of 2 descriptor size that is <= to the
 	 * actual max WQE descriptor size, rather than return the
 	 * max_sg value given by the firmware (which is based on WQE
 	 * sizes as multiples of 16, not powers of 2).
 	 *
 	 * If SRQ implementation is changed for Tavor to be based on
 	 * multiples of 16, the calculation below can be deleted and
 	 * the FW max_sg value returned.
 	 */
 	return min_t(int, dev->limits.max_sg,
 		     ((1 << (fls(dev->limits.max_desc_sz) - 1)) -
 		      sizeof (struct mthca_next_seg)) /
 		     sizeof (struct mthca_data_seg));
 }
 int mthca_init_srq_table(struct mthca_dev *dev)
 {
 	int err;
 	if (!(dev->mthca_flags & MTHCA_FLAG_SRQ))
 		return 0;
 	spin_lock_init(&dev->srq_table.lock);
 	err = mthca_alloc_init(&dev->srq_table.alloc,
 			       dev->limits.num_srqs,
 			       dev->limits.num_srqs - 1,
 			       dev->limits.reserved_srqs);
 	if (err)
 		return err;
 	err = mthca_array_init(&dev->srq_table.srq,
 			       dev->limits.num_srqs);
 	if (err)
 		mthca_alloc_cleanup(&dev->srq_table.alloc);
 	return err;
 }
 void mthca_cleanup_srq_table(struct mthca_dev *dev)
 {
 	if (!(dev->mthca_flags & MTHCA_FLAG_SRQ))
 		return;
 	mthca_array_cleanup(&dev->srq_table.srq, dev->limits.num_srqs);
 	mthca_alloc_cleanup(&dev->srq_table.alloc);
 }
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_uar.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_uar.c
@ -1,78 +0,0 @@
 /*
 * Copyright (c) 2005 Topspin Communications.  All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #include <linux/page.h>
 #include "mthca_dev.h"
 #include "mthca_memfree.h"
 int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar)
 {
 	uar->index = mthca_alloc(&dev->uar_table.alloc);
 	if (uar->index == -1)
 		return -ENOMEM;
 	uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + uar->index;
 	return 0;
 }
 void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar)
 {
 	mthca_free(&dev->uar_table.alloc, uar->index);
 }
 int mthca_init_uar_table(struct mthca_dev *dev)
 {
 	int ret;
 	ret = mthca_alloc_init(&dev->uar_table.alloc,
 			       dev->limits.num_uars,
 			       dev->limits.num_uars - 1,
 			       dev->limits.reserved_uars + 1);
 	if (ret)
 		return ret;
 	ret = mthca_init_db_tab(dev);
 	if (ret)
 		mthca_alloc_cleanup(&dev->uar_table.alloc);
 	return ret;
 }
 void mthca_cleanup_uar_table(struct mthca_dev *dev)
 {
 	mthca_cleanup_db_tab(dev);
 	/* XXX check if any UARs are still allocated? */
 	mthca_alloc_cleanup(&dev->uar_table.alloc);
 }
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_user.h
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_user.h
@ -1,112 +0,0 @@
 /*
 * Copyright (c) 2005 Topspin Communications.  All rights reserved.
 * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #ifndef MTHCA_USER_H
 #define MTHCA_USER_H
 #include <linux/types.h>
 /*
 * Increment this value if any changes that break userspace ABI
 * compatibility are made.
 */
 #define MTHCA_UVERBS_ABI_VERSION	1
 /*
 * Make sure that all structs defined in this file remain laid out so
 * that they pack the same way on 32-bit and 64-bit architectures (to
 * avoid incompatibility between 32-bit userspace and 64-bit kernels).
 * In particular do not use pointer types -- pass pointers in __u64
 * instead.
 */
 struct mthca_alloc_ucontext_resp {
 	__u32 qp_tab_size;
 	__u32 uarc_size;
 };
 struct mthca_alloc_pd_resp {
 	__u32 pdn;
 	__u32 reserved;
 };
 struct mthca_reg_mr {
 /*
 * Mark the memory region with a DMA attribute that causes
 * in-flight DMA to be flushed when the region is written to:
 */
 #define MTHCA_MR_DMASYNC	0x1
 	__u32 mr_attrs;
 	__u32 reserved;
 };
 struct mthca_create_cq {
 	__u32 lkey;
 	__u32 pdn;
 	__u64 arm_db_page;
 	__u64 set_db_page;
 	__u32 arm_db_index;
 	__u32 set_db_index;
 };
 struct mthca_create_cq_resp {
 	__u32 cqn;
 	__u32 reserved;
 };
 struct mthca_resize_cq {
 	__u32 lkey;
 	__u32 reserved;
 };
 struct mthca_create_srq {
 	__u32 lkey;
 	__u32 db_index;
 	__u64 db_page;
 };
 struct mthca_create_srq_resp {
 	__u32 srqn;
 	__u32 reserved;
 };
 struct mthca_create_qp {
 	__u32 lkey;
 	__u32 reserved;
 	__u64 sq_db_page;
 	__u64 rq_db_page;
 	__u32 sq_db_index;
 	__u32 rq_db_index;
 };
 #endif /* MTHCA_USER_H */
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_wqe.h
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_wqe.h
@ -1,131 +0,0 @@
 /*
 * Copyright (c) 2005 Cisco Systems. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #ifndef MTHCA_WQE_H
 #define MTHCA_WQE_H
 #include <linux/types.h>
 enum {
 	MTHCA_NEXT_DBD		= 1 << 7,
 	MTHCA_NEXT_FENCE	= 1 << 6,
 	MTHCA_NEXT_CQ_UPDATE	= 1 << 3,
 	MTHCA_NEXT_EVENT_GEN	= 1 << 2,
 	MTHCA_NEXT_SOLICIT	= 1 << 1,
 	MTHCA_NEXT_IP_CSUM	= 1 << 4,
 	MTHCA_NEXT_TCP_UDP_CSUM = 1 << 5,
 	MTHCA_MLX_VL15		= 1 << 17,
 	MTHCA_MLX_SLR		= 1 << 16
 };
 enum {
 	MTHCA_INVAL_LKEY			= 0x100,
 	MTHCA_TAVOR_MAX_WQES_PER_RECV_DB	= 256,
 	MTHCA_ARBEL_MAX_WQES_PER_SEND_DB	= 255
 };
 struct mthca_next_seg {
 	__be32 nda_op;		/* [31:6] next WQE [4:0] next opcode */
 	__be32 ee_nds;		/* [31:8] next EE  [7] DBD [6] F [5:0] next WQE size */
 	__be32 flags;		/* [3] CQ [2] Event [1] Solicit */
 	__be32 imm;		/* immediate data */
 };
 struct mthca_tavor_ud_seg {
 	u32    reserved1;
 	__be32 lkey;
 	__be64 av_addr;
 	u32    reserved2[4];
 	__be32 dqpn;
 	__be32 qkey;
 	u32    reserved3[2];
 };
 struct mthca_arbel_ud_seg {
 	__be32 av[8];
 	__be32 dqpn;
 	__be32 qkey;
 	u32    reserved[2];
 };
 struct mthca_bind_seg {
 	__be32 flags;		/* [31] Atomic [30] rem write [29] rem read */
 	u32    reserved;
 	__be32 new_rkey;
 	__be32 lkey;
 	__be64 addr;
 	__be64 length;
 };
 struct mthca_raddr_seg {
 	__be64 raddr;
 	__be32 rkey;
 	u32    reserved;
 };
 struct mthca_atomic_seg {
 	__be64 swap_add;
 	__be64 compare;
 };
 struct mthca_data_seg {
 	__be32 byte_count;
 	__be32 lkey;
 	__be64 addr;
 };
 struct mthca_mlx_seg {
 	__be32 nda_op;
 	__be32 nds;
 	__be32 flags;		/* [17] VL15 [16] SLR [14:12] static rate
 				   [11:8] SL [3] C [2] E */
 	__be16 rlid;
 	__be16 vcrc;
 };
 static __always_inline void mthca_set_data_seg(struct mthca_data_seg *dseg,
 					       struct ib_sge *sg)
 {
 	dseg->byte_count = cpu_to_be32(sg->length);
 	dseg->lkey       = cpu_to_be32(sg->lkey);
 	dseg->addr       = cpu_to_be64(sg->addr);
 }
 static __always_inline void mthca_set_data_seg_inval(struct mthca_data_seg *dseg)
 {
 	dseg->byte_count = 0;
 	dseg->lkey       = cpu_to_be32(MTHCA_INVAL_LKEY);
 	dseg->addr       = 0;
 }
 #endif /* MTHCA_WQE_H */
--- a/sys/ofed/drivers/infiniband/util/Kconfig
+++ b/sys/ofed/drivers/infiniband/util/Kconfig
@ -1,6 +0,0 @@
 config INFINIBAND_MADEYE
 	tristate "MAD debug viewer for InfiniBand"
 	depends on INFINIBAND
 	---help---
 	  Prints sent and received MADs on QP 0/1 for debugging.
--- a/targets/pseudo/userland/lib/Makefile.depend
+++ b/targets/pseudo/userland/lib/Makefile.depend
@ -251,7 +251,6 @@ DIRDEPS+= \
 	contrib/ofed/usr.lib/libibumad \
 	contrib/ofed/usr.lib/libibverbs \
 	contrib/ofed/usr.lib/libmlx4 \
 	contrib/ofed/usr.lib/libmthca \
 	contrib/ofed/usr.lib/libopensm \
 	contrib/ofed/usr.lib/libosmcomp \
 	contrib/ofed/usr.lib/libosmvendor \